mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #5411 from crosbymichael/lockdown
Update default restrictions for exec drivers
This commit is contained in:
commit
44140f7909
22 changed files with 809 additions and 428 deletions
|
@ -59,9 +59,10 @@ func init() {
|
|||
}
|
||||
|
||||
type driver struct {
|
||||
root string // root path for the driver to use
|
||||
apparmor bool
|
||||
sharedRoot bool
|
||||
root string // root path for the driver to use
|
||||
apparmor bool
|
||||
sharedRoot bool
|
||||
restrictionPath string
|
||||
}
|
||||
|
||||
func NewDriver(root string, apparmor bool) (*driver, error) {
|
||||
|
@ -69,10 +70,15 @@ func NewDriver(root string, apparmor bool) (*driver, error) {
|
|||
if err := linkLxcStart(root); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
restrictionPath := filepath.Join(root, "empty")
|
||||
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &driver{
|
||||
apparmor: apparmor,
|
||||
root: root,
|
||||
sharedRoot: rootIsShared(),
|
||||
apparmor: apparmor,
|
||||
root: root,
|
||||
sharedRoot: rootIsShared(),
|
||||
restrictionPath: restrictionPath,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -403,14 +409,16 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
|
|||
|
||||
if err := LxcTemplateCompiled.Execute(fo, struct {
|
||||
*execdriver.Command
|
||||
AppArmor bool
|
||||
ProcessLabel string
|
||||
MountLabel string
|
||||
AppArmor bool
|
||||
ProcessLabel string
|
||||
MountLabel string
|
||||
RestrictionSource string
|
||||
}{
|
||||
Command: c,
|
||||
AppArmor: d.apparmor,
|
||||
ProcessLabel: process,
|
||||
MountLabel: mount,
|
||||
Command: c,
|
||||
AppArmor: d.apparmor,
|
||||
ProcessLabel: process,
|
||||
MountLabel: mount,
|
||||
RestrictionSource: d.restrictionPath,
|
||||
}); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -88,7 +88,9 @@ lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noex
|
|||
|
||||
# WARNING: sysfs is a known attack vector and should probably be disabled
|
||||
# if your userspace allows it. eg. see http://bit.ly/T9CkqJ
|
||||
{{if .Privileged}}
|
||||
lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
|
||||
{{end}}
|
||||
|
||||
{{if .Tty}}
|
||||
lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0
|
||||
|
@ -109,8 +111,15 @@ lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabS
|
|||
{{if .AppArmor}}
|
||||
lxc.aa_profile = unconfined
|
||||
{{else}}
|
||||
#lxc.aa_profile = unconfined
|
||||
# not unconfined
|
||||
{{end}}
|
||||
{{else}}
|
||||
# restrict access to proc
|
||||
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/sys none bind,ro 0 0
|
||||
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/irq none bind,ro 0 0
|
||||
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/acpi none bind,ro 0 0
|
||||
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/sysrq-trigger none bind,ro 0 0
|
||||
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
|
||||
{{end}}
|
||||
|
||||
# limits
|
||||
|
|
|
@ -25,6 +25,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
|
|||
container.Cgroups.Name = c.ID
|
||||
// check to see if we are running in ramdisk to disable pivot root
|
||||
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
||||
container.Context["restriction_path"] = d.restrictionPath
|
||||
|
||||
if err := d.createNetwork(container, c); err != nil {
|
||||
return nil, err
|
||||
|
@ -33,6 +34,8 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
|
|||
if err := d.setPrivileged(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "devtmpfs"})
|
||||
}
|
||||
if err := d.setupCgroups(container, c); err != nil {
|
||||
return nil, err
|
||||
|
@ -81,6 +84,11 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
|
|||
c.Enabled = true
|
||||
}
|
||||
container.Cgroups.DeviceAccess = true
|
||||
|
||||
// add sysfs as a mount for privileged containers
|
||||
container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "sysfs"})
|
||||
delete(container.Context, "restriction_path")
|
||||
|
||||
if apparmor.IsEnabled() {
|
||||
container.Context["apparmor_profile"] = "unconfined"
|
||||
}
|
||||
|
@ -99,7 +107,13 @@ func (d *driver) setupCgroups(container *libcontainer.Container, c *execdriver.C
|
|||
|
||||
func (d *driver) setupMounts(container *libcontainer.Container, c *execdriver.Command) error {
|
||||
for _, m := range c.Mounts {
|
||||
container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private})
|
||||
container.Mounts = append(container.Mounts, libcontainer.Mount{
|
||||
Type: "bind",
|
||||
Source: m.Source,
|
||||
Destination: m.Destination,
|
||||
Writable: m.Writable,
|
||||
Private: m.Private,
|
||||
})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import (
|
|||
|
||||
const (
|
||||
DriverName = "native"
|
||||
Version = "0.1"
|
||||
Version = "0.2"
|
||||
BackupApparmorProfilePath = "apparmor/docker.back" // relative to docker root
|
||||
)
|
||||
|
||||
|
@ -62,6 +62,7 @@ type driver struct {
|
|||
root string
|
||||
initPath string
|
||||
activeContainers map[string]*exec.Cmd
|
||||
restrictionPath string
|
||||
}
|
||||
|
||||
func NewDriver(root, initPath string) (*driver, error) {
|
||||
|
@ -72,8 +73,14 @@ func NewDriver(root, initPath string) (*driver, error) {
|
|||
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
restrictionPath := filepath.Join(root, "empty")
|
||||
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &driver{
|
||||
root: root,
|
||||
restrictionPath: restrictionPath,
|
||||
initPath: initPath,
|
||||
activeContainers: make(map[string]*exec.Cmd),
|
||||
}, nil
|
||||
|
|
|
@ -665,3 +665,25 @@ func TestUnPrivilegedCannotMount(t *testing.T) {
|
|||
|
||||
logDone("run - test un-privileged cannot mount")
|
||||
}
|
||||
|
||||
func TestSysNotAvaliableInNonPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "busybox", "ls", "/sys/kernel")
|
||||
if code, err := runCommand(cmd); err == nil || code == 0 {
|
||||
t.Fatal("sys should not be available in a non privileged container")
|
||||
}
|
||||
|
||||
deleteAllContainers()
|
||||
|
||||
logDone("run - sys not avaliable in non privileged container")
|
||||
}
|
||||
|
||||
func TestSysAvaliableInPrivilegedContainers(t *testing.T) {
|
||||
cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "ls", "/sys/kernel")
|
||||
if code, err := runCommand(cmd); err != nil || code != 0 {
|
||||
t.Fatalf("sys should be available in privileged container")
|
||||
}
|
||||
|
||||
deleteAllContainers()
|
||||
|
||||
logDone("run - sys avaliable in privileged container")
|
||||
}
|
||||
|
|
|
@ -16,76 +16,149 @@ process are specified in this file. The configuration is used for each process
|
|||
Sample `container.json` file:
|
||||
```json
|
||||
{
|
||||
"mounts" : [
|
||||
{
|
||||
"type" : "devtmpfs"
|
||||
}
|
||||
],
|
||||
"tty" : true,
|
||||
"environment" : [
|
||||
"HOME=/",
|
||||
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
|
||||
"container=docker",
|
||||
"TERM=xterm-256color"
|
||||
],
|
||||
"hostname" : "koye",
|
||||
"cgroups" : {
|
||||
"parent" : "docker",
|
||||
"name" : "docker-koye"
|
||||
},
|
||||
"capabilities_mask" : [
|
||||
{
|
||||
"value" : 8,
|
||||
"key" : "SETPCAP",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"value" : 16,
|
||||
"key" : "SYS_MODULE"
|
||||
},
|
||||
{
|
||||
"value" : 17,
|
||||
"key" : "SYS_RAWIO",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"key" : "SYS_PACCT",
|
||||
"value" : 20,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 21,
|
||||
"key" : "SYS_ADMIN",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 23,
|
||||
"key" : "SYS_NICE",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 24,
|
||||
"key" : "SYS_RESOURCE",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"key" : "SYS_TIME",
|
||||
"value" : 25,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"value" : 26,
|
||||
"key" : "SYS_TTY_CONFIG"
|
||||
},
|
||||
{
|
||||
"key" : "AUDIT_WRITE",
|
||||
"value" : 29,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 30,
|
||||
"key" : "AUDIT_CONTROL",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"key" : "MAC_OVERRIDE",
|
||||
"value" : 32
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"key" : "MAC_ADMIN",
|
||||
"value" : 33
|
||||
},
|
||||
{
|
||||
"key" : "NET_ADMIN",
|
||||
"value" : 12,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 27,
|
||||
"key" : "MKNOD",
|
||||
"enabled" : true
|
||||
}
|
||||
],
|
||||
"networks" : [
|
||||
{
|
||||
"gateway" : "172.17.42.1",
|
||||
"mtu" : 1500,
|
||||
"address" : "127.0.0.1/0",
|
||||
"type" : "loopback",
|
||||
"gateway" : "localhost"
|
||||
},
|
||||
{
|
||||
"mtu" : 1500,
|
||||
"address" : "172.17.42.2/16",
|
||||
"type" : "veth",
|
||||
"context" : {
|
||||
"bridge" : "docker0",
|
||||
"prefix" : "veth"
|
||||
},
|
||||
"address" : "172.17.0.2/16",
|
||||
"type" : "veth",
|
||||
"mtu" : 1500
|
||||
}
|
||||
],
|
||||
"cgroups" : {
|
||||
"parent" : "docker",
|
||||
"name" : "11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620"
|
||||
},
|
||||
"tty" : true,
|
||||
"environment" : [
|
||||
"HOME=/",
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"HOSTNAME=11bb30683fb0",
|
||||
"TERM=xterm"
|
||||
],
|
||||
"capabilities_mask" : [
|
||||
"SETPCAP",
|
||||
"SYS_MODULE",
|
||||
"SYS_RAWIO",
|
||||
"SYS_PACCT",
|
||||
"SYS_ADMIN",
|
||||
"SYS_NICE",
|
||||
"SYS_RESOURCE",
|
||||
"SYS_TIME",
|
||||
"SYS_TTY_CONFIG",
|
||||
"MKNOD",
|
||||
"AUDIT_WRITE",
|
||||
"AUDIT_CONTROL",
|
||||
"MAC_OVERRIDE",
|
||||
"MAC_ADMIN",
|
||||
"NET_ADMIN"
|
||||
],
|
||||
"context" : {
|
||||
"apparmor_profile" : "docker-default"
|
||||
},
|
||||
"mounts" : [
|
||||
{
|
||||
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/resolv.conf",
|
||||
"writable" : false,
|
||||
"destination" : "/etc/resolv.conf",
|
||||
"private" : true
|
||||
},
|
||||
{
|
||||
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/hostname",
|
||||
"writable" : false,
|
||||
"destination" : "/etc/hostname",
|
||||
"private" : true
|
||||
},
|
||||
{
|
||||
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/hosts",
|
||||
"writable" : false,
|
||||
"destination" : "/etc/hosts",
|
||||
"private" : true
|
||||
"gateway" : "172.17.42.1"
|
||||
}
|
||||
],
|
||||
"namespaces" : [
|
||||
"NEWNS",
|
||||
"NEWUTS",
|
||||
"NEWIPC",
|
||||
"NEWPID",
|
||||
"NEWNET"
|
||||
{
|
||||
"key" : "NEWNS",
|
||||
"value" : 131072,
|
||||
"enabled" : true,
|
||||
"file" : "mnt"
|
||||
},
|
||||
{
|
||||
"key" : "NEWUTS",
|
||||
"value" : 67108864,
|
||||
"enabled" : true,
|
||||
"file" : "uts"
|
||||
},
|
||||
{
|
||||
"enabled" : true,
|
||||
"file" : "ipc",
|
||||
"key" : "NEWIPC",
|
||||
"value" : 134217728
|
||||
},
|
||||
{
|
||||
"file" : "pid",
|
||||
"enabled" : true,
|
||||
"value" : 536870912,
|
||||
"key" : "NEWPID"
|
||||
},
|
||||
{
|
||||
"enabled" : true,
|
||||
"file" : "net",
|
||||
"key" : "NEWNET",
|
||||
"value" : 1073741824
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
|
60
pkg/libcontainer/console/console.go
Normal file
60
pkg/libcontainer/console/console.go
Normal file
|
@ -0,0 +1,60 @@
|
|||
// +build linux
|
||||
|
||||
package console
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Setup initializes the proper /dev/console inside the rootfs path
|
||||
func Setup(rootfs, consolePath, mountLabel string) error {
|
||||
oldMask := system.Umask(0000)
|
||||
defer system.Umask(oldMask)
|
||||
|
||||
stat, err := os.Stat(consolePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stat console %s %s", consolePath, err)
|
||||
}
|
||||
var (
|
||||
st = stat.Sys().(*syscall.Stat_t)
|
||||
dest = filepath.Join(rootfs, "dev/console")
|
||||
)
|
||||
if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("remove %s %s", dest, err)
|
||||
}
|
||||
if err := os.Chmod(consolePath, 0600); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Chown(consolePath, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
|
||||
return fmt.Errorf("mknod %s %s", dest, err)
|
||||
}
|
||||
if err := label.SetFileLabel(consolePath, mountLabel); err != nil {
|
||||
return fmt.Errorf("set file label %s %s", dest, err)
|
||||
}
|
||||
if err := system.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil {
|
||||
return fmt.Errorf("bind %s to %s %s", consolePath, dest, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func OpenAndDup(consolePath string) error {
|
||||
slave, err := system.OpenTerminal(consolePath, syscall.O_RDWR)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open terminal %s", err)
|
||||
}
|
||||
if err := system.Dup2(slave.Fd(), 0); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Dup2(slave.Fd(), 1); err != nil {
|
||||
return err
|
||||
}
|
||||
return system.Dup2(slave.Fd(), 2)
|
||||
}
|
|
@ -23,7 +23,7 @@ type Container struct {
|
|||
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
|
||||
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
|
||||
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
|
||||
Mounts []Mount `json:"mounts,omitempty"`
|
||||
Mounts Mounts `json:"mounts,omitempty"`
|
||||
}
|
||||
|
||||
// Network defines configuration for a container's networking stack
|
||||
|
@ -37,12 +37,3 @@ type Network struct {
|
|||
Gateway string `json:"gateway,omitempty"`
|
||||
Mtu int `json:"mtu,omitempty"`
|
||||
}
|
||||
|
||||
// Bind mounts from the host system to the container
|
||||
//
|
||||
type Mount struct {
|
||||
Source string `json:"source"` // Source path, in the host namespace
|
||||
Destination string `json:"destination"` // Destination path, in the container
|
||||
Writable bool `json:"writable"`
|
||||
Private bool `json:"private"`
|
||||
}
|
||||
|
|
|
@ -1,50 +1,146 @@
|
|||
{
|
||||
"hostname": "koye",
|
||||
"tty": true,
|
||||
"environment": [
|
||||
"HOME=/",
|
||||
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
|
||||
"container=docker",
|
||||
"TERM=xterm-256color"
|
||||
],
|
||||
"namespaces": [
|
||||
"NEWIPC",
|
||||
"NEWNS",
|
||||
"NEWPID",
|
||||
"NEWUTS",
|
||||
"NEWNET"
|
||||
],
|
||||
"capabilities_mask": [
|
||||
"SETPCAP",
|
||||
"SYS_MODULE",
|
||||
"SYS_RAWIO",
|
||||
"SYS_PACCT",
|
||||
"SYS_ADMIN",
|
||||
"SYS_NICE",
|
||||
"SYS_RESOURCE",
|
||||
"SYS_TIME",
|
||||
"SYS_TTY_CONFIG",
|
||||
"MKNOD",
|
||||
"AUDIT_WRITE",
|
||||
"AUDIT_CONTROL",
|
||||
"MAC_OVERRIDE",
|
||||
"MAC_ADMIN",
|
||||
"NET_ADMIN"
|
||||
],
|
||||
"networks": [{
|
||||
"type": "veth",
|
||||
"context": {
|
||||
"bridge": "docker0",
|
||||
"prefix": "dock"
|
||||
},
|
||||
"address": "172.17.0.100/16",
|
||||
"gateway": "172.17.42.1",
|
||||
"mtu": 1500
|
||||
}
|
||||
],
|
||||
"cgroups": {
|
||||
"name": "docker-koye",
|
||||
"parent": "docker",
|
||||
"memory": 5248000
|
||||
}
|
||||
"mounts" : [
|
||||
{
|
||||
"type" : "devtmpfs"
|
||||
}
|
||||
],
|
||||
"tty" : true,
|
||||
"environment" : [
|
||||
"HOME=/",
|
||||
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
|
||||
"container=docker",
|
||||
"TERM=xterm-256color"
|
||||
],
|
||||
"hostname" : "koye",
|
||||
"cgroups" : {
|
||||
"parent" : "docker",
|
||||
"name" : "docker-koye"
|
||||
},
|
||||
"capabilities_mask" : [
|
||||
{
|
||||
"value" : 8,
|
||||
"key" : "SETPCAP",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"value" : 16,
|
||||
"key" : "SYS_MODULE"
|
||||
},
|
||||
{
|
||||
"value" : 17,
|
||||
"key" : "SYS_RAWIO",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"key" : "SYS_PACCT",
|
||||
"value" : 20,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 21,
|
||||
"key" : "SYS_ADMIN",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 23,
|
||||
"key" : "SYS_NICE",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 24,
|
||||
"key" : "SYS_RESOURCE",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"key" : "SYS_TIME",
|
||||
"value" : 25,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"value" : 26,
|
||||
"key" : "SYS_TTY_CONFIG"
|
||||
},
|
||||
{
|
||||
"key" : "AUDIT_WRITE",
|
||||
"value" : 29,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 30,
|
||||
"key" : "AUDIT_CONTROL",
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"key" : "MAC_OVERRIDE",
|
||||
"value" : 32
|
||||
},
|
||||
{
|
||||
"enabled" : false,
|
||||
"key" : "MAC_ADMIN",
|
||||
"value" : 33
|
||||
},
|
||||
{
|
||||
"key" : "NET_ADMIN",
|
||||
"value" : 12,
|
||||
"enabled" : false
|
||||
},
|
||||
{
|
||||
"value" : 27,
|
||||
"key" : "MKNOD",
|
||||
"enabled" : true
|
||||
}
|
||||
],
|
||||
"networks" : [
|
||||
{
|
||||
"mtu" : 1500,
|
||||
"address" : "127.0.0.1/0",
|
||||
"type" : "loopback",
|
||||
"gateway" : "localhost"
|
||||
},
|
||||
{
|
||||
"mtu" : 1500,
|
||||
"address" : "172.17.42.2/16",
|
||||
"type" : "veth",
|
||||
"context" : {
|
||||
"bridge" : "docker0",
|
||||
"prefix" : "veth"
|
||||
},
|
||||
"gateway" : "172.17.42.1"
|
||||
}
|
||||
],
|
||||
"namespaces" : [
|
||||
{
|
||||
"key" : "NEWNS",
|
||||
"value" : 131072,
|
||||
"enabled" : true,
|
||||
"file" : "mnt"
|
||||
},
|
||||
{
|
||||
"key" : "NEWUTS",
|
||||
"value" : 67108864,
|
||||
"enabled" : true,
|
||||
"file" : "uts"
|
||||
},
|
||||
{
|
||||
"enabled" : true,
|
||||
"file" : "ipc",
|
||||
"key" : "NEWIPC",
|
||||
"value" : 134217728
|
||||
},
|
||||
{
|
||||
"file" : "pid",
|
||||
"enabled" : true,
|
||||
"value" : 536870912,
|
||||
"key" : "NEWPID"
|
||||
},
|
||||
{
|
||||
"enabled" : true,
|
||||
"file" : "net",
|
||||
"key" : "NEWNET",
|
||||
"value" : 1073741824
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
143
pkg/libcontainer/mount/init.go
Normal file
143
pkg/libcontainer/mount/init.go
Normal file
|
@ -0,0 +1,143 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// default mount point flags
|
||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
type mount struct {
|
||||
source string
|
||||
path string
|
||||
device string
|
||||
flags int
|
||||
data string
|
||||
}
|
||||
|
||||
// InitializeMountNamespace setups up the devices, mount points, and filesystems for use inside a
|
||||
// new mount namepsace
|
||||
func InitializeMountNamespace(rootfs, console string, container *libcontainer.Container) error {
|
||||
var (
|
||||
err error
|
||||
flag = syscall.MS_PRIVATE
|
||||
)
|
||||
if container.NoPivotRoot {
|
||||
flag = syscall.MS_SLAVE
|
||||
}
|
||||
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
|
||||
return fmt.Errorf("mounting / as slave %s", err)
|
||||
}
|
||||
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
|
||||
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
|
||||
}
|
||||
if err := mountSystem(rootfs, container); err != nil {
|
||||
return fmt.Errorf("mount system %s", err)
|
||||
}
|
||||
if err := setupBindmounts(rootfs, container.Mounts); err != nil {
|
||||
return fmt.Errorf("bind mounts %s", err)
|
||||
}
|
||||
if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil {
|
||||
return fmt.Errorf("copy dev nodes %s", err)
|
||||
}
|
||||
if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
|
||||
if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
|
||||
return fmt.Errorf("restrict %s", err)
|
||||
}
|
||||
}
|
||||
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Chdir(rootfs); err != nil {
|
||||
return fmt.Errorf("chdir into %s %s", rootfs, err)
|
||||
}
|
||||
|
||||
if container.NoPivotRoot {
|
||||
err = MsMoveRoot(rootfs)
|
||||
} else {
|
||||
err = PivotRoot(rootfs)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if container.ReadonlyFs {
|
||||
if err := SetReadonly(); err != nil {
|
||||
return fmt.Errorf("set readonly %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
system.Umask(0022)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
|
||||
// inside the mount namespace
|
||||
func mountSystem(rootfs string, container *libcontainer.Container) error {
|
||||
for _, m := range newSystemMounts(rootfs, container.Context["mount_label"], container.Mounts) {
|
||||
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("mkdirall %s %s", m.path, err)
|
||||
}
|
||||
if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
|
||||
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
|
||||
for _, m := range bindMounts.OfType("bind") {
|
||||
var (
|
||||
flags = syscall.MS_BIND | syscall.MS_REC
|
||||
dest = filepath.Join(rootfs, m.Destination)
|
||||
)
|
||||
if !m.Writable {
|
||||
flags = flags | syscall.MS_RDONLY
|
||||
}
|
||||
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
||||
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
||||
}
|
||||
if !m.Writable {
|
||||
if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
|
||||
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
|
||||
}
|
||||
}
|
||||
if m.Private {
|
||||
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
||||
return fmt.Errorf("mounting %s private %s", dest, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
|
||||
// standard bind mounts allowing them to be more dymanic
|
||||
func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
|
||||
systemMounts := []mount{
|
||||
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
||||
}
|
||||
|
||||
if len(mounts.OfType("devtmpfs")) == 1 {
|
||||
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"})
|
||||
}
|
||||
systemMounts = append(systemMounts,
|
||||
mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
||||
mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)})
|
||||
|
||||
if len(mounts.OfType("sysfs")) == 1 {
|
||||
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags})
|
||||
}
|
||||
return systemMounts
|
||||
}
|
19
pkg/libcontainer/mount/msmoveroot.go
Normal file
19
pkg/libcontainer/mount/msmoveroot.go
Normal file
|
@ -0,0 +1,19 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func MsMoveRoot(rootfs string) error {
|
||||
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
|
||||
return fmt.Errorf("mount move %s into / %s", rootfs, err)
|
||||
}
|
||||
if err := system.Chroot("."); err != nil {
|
||||
return fmt.Errorf("chroot . %s", err)
|
||||
}
|
||||
return system.Chdir("/")
|
||||
}
|
49
pkg/libcontainer/mount/nodes/nodes.go
Normal file
49
pkg/libcontainer/mount/nodes/nodes.go
Normal file
|
@ -0,0 +1,49 @@
|
|||
// +build linux
|
||||
|
||||
package nodes
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Default list of device nodes to copy
|
||||
var DefaultNodes = []string{
|
||||
"null",
|
||||
"zero",
|
||||
"full",
|
||||
"random",
|
||||
"urandom",
|
||||
"tty",
|
||||
}
|
||||
|
||||
// CopyN copies the device node from the host into the rootfs
|
||||
func CopyN(rootfs string, nodesToCopy []string) error {
|
||||
oldMask := system.Umask(0000)
|
||||
defer system.Umask(oldMask)
|
||||
|
||||
for _, node := range nodesToCopy {
|
||||
if err := Copy(rootfs, node); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Copy(rootfs, node string) error {
|
||||
stat, err := os.Stat(filepath.Join("/dev", node))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var (
|
||||
dest = filepath.Join(rootfs, "dev", node)
|
||||
st = stat.Sys().(*syscall.Stat_t)
|
||||
)
|
||||
if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("copy %s %s", node, err)
|
||||
}
|
||||
return nil
|
||||
}
|
31
pkg/libcontainer/mount/pivotroot.go
Normal file
31
pkg/libcontainer/mount/pivotroot.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func PivotRoot(rootfs string) error {
|
||||
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err)
|
||||
}
|
||||
if err := system.Pivotroot(rootfs, pivotDir); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
if err := system.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
// path to pivot dir now changed, update
|
||||
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
|
||||
if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("unmount pivot_root dir %s", err)
|
||||
}
|
||||
return os.Remove(pivotDir)
|
||||
}
|
26
pkg/libcontainer/mount/ptmx.go
Normal file
26
pkg/libcontainer/mount/ptmx.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/console"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
func SetupPtmx(rootfs, consolePath, mountLabel string) error {
|
||||
ptmx := filepath.Join(rootfs, "dev/ptmx")
|
||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
}
|
||||
if consolePath != "" {
|
||||
if err := console.Setup(rootfs, consolePath, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
12
pkg/libcontainer/mount/readonly.go
Normal file
12
pkg/libcontainer/mount/readonly.go
Normal file
|
@ -0,0 +1,12 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func SetReadonly() error {
|
||||
return system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
|
||||
}
|
31
pkg/libcontainer/mount/remount.go
Normal file
31
pkg/libcontainer/mount/remount.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func RemountProc() error {
|
||||
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func RemountSys() error {
|
||||
if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil {
|
||||
if err != syscall.EINVAL {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -6,6 +6,7 @@ import (
|
|||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/mount"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -63,10 +64,10 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s
|
|||
if err := system.Unshare(syscall.CLONE_NEWNS); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if err := remountProc(); err != nil {
|
||||
if err := mount.RemountProc(); err != nil {
|
||||
return -1, fmt.Errorf("remount proc %s", err)
|
||||
}
|
||||
if err := remountSys(); err != nil {
|
||||
if err := mount.RemountSys(); err != nil {
|
||||
return -1, fmt.Errorf("remount sys %s", err)
|
||||
}
|
||||
goto dropAndExec
|
||||
|
|
|
@ -11,8 +11,10 @@ import (
|
|||
"github.com/dotcloud/docker/pkg/apparmor"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/console"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/mount"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/network"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/utils"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"github.com/dotcloud/docker/pkg/user"
|
||||
|
@ -20,7 +22,7 @@ import (
|
|||
|
||||
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
|
||||
// and other options required for the new container.
|
||||
func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error {
|
||||
func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
|
||||
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -36,20 +38,16 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
|
|||
ns.logger.Println("received context from parent")
|
||||
syncPipe.Close()
|
||||
|
||||
if console != "" {
|
||||
ns.logger.Printf("setting up %s as console\n", console)
|
||||
slave, err := system.OpenTerminal(console, syscall.O_RDWR)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open terminal %s", err)
|
||||
}
|
||||
if err := dupSlave(slave); err != nil {
|
||||
return fmt.Errorf("dup2 slave %s", err)
|
||||
if consolePath != "" {
|
||||
ns.logger.Printf("setting up %s as console\n", consolePath)
|
||||
if err := console.OpenAndDup(consolePath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := system.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if console != "" {
|
||||
if consolePath != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
|
@ -60,7 +58,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
|
|||
|
||||
label.Init()
|
||||
ns.logger.Println("setup mount namespace")
|
||||
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil {
|
||||
if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil {
|
||||
return fmt.Errorf("setup mount namespace %s", err)
|
||||
}
|
||||
if err := system.Sethostname(container.Hostname); err != nil {
|
||||
|
@ -114,21 +112,6 @@ func setupUser(container *libcontainer.Container) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// dupSlave dup2 the pty slave's fd into stdout and stdin and ensures that
|
||||
// the slave's fd is 0, or stdin
|
||||
func dupSlave(slave *os.File) error {
|
||||
if err := system.Dup2(slave.Fd(), 0); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Dup2(slave.Fd(), 1); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Dup2(slave.Fd(), 2); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupVethNetwork uses the Network config if it is not nil to initialize
|
||||
// the new veth interface inside the container for use by changing the name to eth0
|
||||
// setting the MTU and IP address along with the default gateway
|
||||
|
|
|
@ -1,265 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package nsinit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// default mount point flags
|
||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
// setupNewMountNamespace is used to initialize a new mount namespace for an new
|
||||
// container in the rootfs that is specified.
|
||||
//
|
||||
// There is no need to unmount the new mounts because as soon as the mount namespace
|
||||
// is no longer in use, the mounts will be removed automatically
|
||||
func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool, mountLabel string) error {
|
||||
flag := syscall.MS_PRIVATE
|
||||
if noPivotRoot {
|
||||
flag = syscall.MS_SLAVE
|
||||
}
|
||||
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
|
||||
return fmt.Errorf("mounting / as slave %s", err)
|
||||
}
|
||||
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
|
||||
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
|
||||
}
|
||||
if err := mountSystem(rootfs, mountLabel); err != nil {
|
||||
return fmt.Errorf("mount system %s", err)
|
||||
}
|
||||
|
||||
for _, m := range bindMounts {
|
||||
var (
|
||||
flags = syscall.MS_BIND | syscall.MS_REC
|
||||
dest = filepath.Join(rootfs, m.Destination)
|
||||
)
|
||||
if !m.Writable {
|
||||
flags = flags | syscall.MS_RDONLY
|
||||
}
|
||||
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
||||
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
||||
}
|
||||
if !m.Writable {
|
||||
if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
|
||||
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
|
||||
}
|
||||
}
|
||||
if m.Private {
|
||||
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
||||
return fmt.Errorf("mounting %s private %s", dest, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := copyDevNodes(rootfs); err != nil {
|
||||
return fmt.Errorf("copy dev nodes %s", err)
|
||||
}
|
||||
if err := setupPtmx(rootfs, console, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Chdir(rootfs); err != nil {
|
||||
return fmt.Errorf("chdir into %s %s", rootfs, err)
|
||||
}
|
||||
|
||||
if noPivotRoot {
|
||||
if err := rootMsMove(rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := rootPivot(rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if readonly {
|
||||
if err := system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
|
||||
return fmt.Errorf("mounting %s as readonly %s", rootfs, err)
|
||||
}
|
||||
}
|
||||
|
||||
system.Umask(0022)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// use a pivot root to setup the rootfs
|
||||
func rootPivot(rootfs string) error {
|
||||
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err)
|
||||
}
|
||||
if err := system.Pivotroot(rootfs, pivotDir); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
if err := system.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
// path to pivot dir now changed, update
|
||||
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
|
||||
if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("unmount pivot_root dir %s", err)
|
||||
}
|
||||
if err := os.Remove(pivotDir); err != nil {
|
||||
return fmt.Errorf("remove pivot_root dir %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// use MS_MOVE and chroot to setup the rootfs
|
||||
func rootMsMove(rootfs string) error {
|
||||
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
|
||||
return fmt.Errorf("mount move %s into / %s", rootfs, err)
|
||||
}
|
||||
if err := system.Chroot("."); err != nil {
|
||||
return fmt.Errorf("chroot . %s", err)
|
||||
}
|
||||
if err := system.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// copyDevNodes mknods the hosts devices so the new container has access to them
|
||||
func copyDevNodes(rootfs string) error {
|
||||
oldMask := system.Umask(0000)
|
||||
defer system.Umask(oldMask)
|
||||
|
||||
for _, node := range []string{
|
||||
"null",
|
||||
"zero",
|
||||
"full",
|
||||
"random",
|
||||
"urandom",
|
||||
"tty",
|
||||
} {
|
||||
if err := copyDevNode(rootfs, node); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyDevNode(rootfs, node string) error {
|
||||
stat, err := os.Stat(filepath.Join("/dev", node))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var (
|
||||
dest = filepath.Join(rootfs, "dev", node)
|
||||
st = stat.Sys().(*syscall.Stat_t)
|
||||
)
|
||||
if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("copy %s %s", node, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupConsole ensures that the container has a proper /dev/console setup
|
||||
func setupConsole(rootfs, console string, mountLabel string) error {
|
||||
oldMask := system.Umask(0000)
|
||||
defer system.Umask(oldMask)
|
||||
|
||||
stat, err := os.Stat(console)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stat console %s %s", console, err)
|
||||
}
|
||||
var (
|
||||
st = stat.Sys().(*syscall.Stat_t)
|
||||
dest = filepath.Join(rootfs, "dev/console")
|
||||
)
|
||||
if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("remove %s %s", dest, err)
|
||||
}
|
||||
if err := os.Chmod(console, 0600); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Chown(console, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
|
||||
return fmt.Errorf("mknod %s %s", dest, err)
|
||||
}
|
||||
if err := label.SetFileLabel(console, mountLabel); err != nil {
|
||||
return fmt.Errorf("SetFileLabel Failed %s %s", dest, err)
|
||||
}
|
||||
if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil {
|
||||
return fmt.Errorf("bind %s to %s %s", console, dest, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
|
||||
// inside the mount namespace
|
||||
func mountSystem(rootfs string, mountLabel string) error {
|
||||
for _, m := range []struct {
|
||||
source string
|
||||
path string
|
||||
device string
|
||||
flags int
|
||||
data string
|
||||
}{
|
||||
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
||||
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
|
||||
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
||||
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
||||
} {
|
||||
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("mkdirall %s %s", m.path, err)
|
||||
}
|
||||
if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
|
||||
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupPtmx adds a symlink to pts/ptmx for /dev/ptmx and
|
||||
// finishes setting up /dev/console
|
||||
func setupPtmx(rootfs, console string, mountLabel string) error {
|
||||
ptmx := filepath.Join(rootfs, "dev/ptmx")
|
||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
}
|
||||
if console != "" {
|
||||
if err := setupConsole(rootfs, console, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// remountProc is used to detach and remount the proc filesystem
|
||||
// commonly needed with running a new process inside an existing container
|
||||
func remountProc() error {
|
||||
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func remountSys() error {
|
||||
if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil {
|
||||
if err != syscall.EINVAL {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
51
pkg/libcontainer/security/restrict/restrict.go
Normal file
51
pkg/libcontainer/security/restrict/restrict.go
Normal file
|
@ -0,0 +1,51 @@
|
|||
package restrict
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
)
|
||||
|
||||
const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY
|
||||
|
||||
var restrictions = map[string]string{
|
||||
// dirs
|
||||
"/proc/sys": "",
|
||||
"/proc/irq": "",
|
||||
"/proc/acpi": "",
|
||||
|
||||
// files
|
||||
"/proc/sysrq-trigger": "/dev/null",
|
||||
"/proc/kcore": "/dev/null",
|
||||
}
|
||||
|
||||
// Restrict locks down access to many areas of proc
|
||||
// by using the asumption that the user does not have mount caps to
|
||||
// revert the changes made here
|
||||
func Restrict(rootfs, empty string) error {
|
||||
for dest, source := range restrictions {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
|
||||
// we don't have a "/dev/null" for dirs so have the requester pass a dir
|
||||
// for us to bind mount
|
||||
switch source {
|
||||
case "":
|
||||
source = empty
|
||||
default:
|
||||
source = filepath.Join(rootfs, source)
|
||||
}
|
||||
if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
|
||||
}
|
||||
if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
|
||||
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -11,6 +11,26 @@ var (
|
|||
ErrUnsupported = errors.New("Unsupported method")
|
||||
)
|
||||
|
||||
type Mounts []Mount
|
||||
|
||||
func (s Mounts) OfType(t string) Mounts {
|
||||
out := Mounts{}
|
||||
for _, m := range s {
|
||||
if m.Type == t {
|
||||
out = append(out, m)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
Source string `json:"source,omitempty"` // Source path, in the host namespace
|
||||
Destination string `json:"destination,omitempty"` // Destination path, in the container
|
||||
Writable bool `json:"writable,omitempty"`
|
||||
Private bool `json:"private,omitempty"`
|
||||
}
|
||||
|
||||
// namespaceList is used to convert the libcontainer types
|
||||
// into the names of the files located in /proc/<pid>/ns/* for
|
||||
// each namespace
|
||||
|
|
Loading…
Reference in a new issue