mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Move all bind-mounts in the container inside the namespace
This moves the bind mounts like /.dockerinit, /etc/hostname, volumes, etc into the container namespace, by setting them up using lxc. This is useful to avoid littering the global namespace with a lot of mounts that are internal to each container and are not generally needed on the outside. In particular, it seems that having a lot of mounts is problematic wrt scaling to a lot of containers on systems where the root filesystem is mounted --rshared. Note that the "private" option is only supported by the native driver, as lxc doesn't support setting this. This is not a huge problem, but it does mean that some mounts are unnecessarily shared inside the container if you're using the lxc driver. Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
This commit is contained in:
parent
636959e20a
commit
6c266c4b42
11 changed files with 78 additions and 64 deletions
|
@ -97,6 +97,13 @@ type Resources struct {
|
||||||
CpuShares int64 `json:"cpu_shares"`
|
CpuShares int64 `json:"cpu_shares"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Mount struct {
|
||||||
|
Source string `json:"source"`
|
||||||
|
Destination string `json:"destination"`
|
||||||
|
Writable bool `json:"writable"`
|
||||||
|
Private bool `json:"private"`
|
||||||
|
}
|
||||||
|
|
||||||
// Process wrapps an os/exec.Cmd to add more metadata
|
// Process wrapps an os/exec.Cmd to add more metadata
|
||||||
type Command struct {
|
type Command struct {
|
||||||
exec.Cmd `json:"-"`
|
exec.Cmd `json:"-"`
|
||||||
|
@ -114,6 +121,7 @@ type Command struct {
|
||||||
Network *Network `json:"network"` // if network is nil then networking is disabled
|
Network *Network `json:"network"` // if network is nil then networking is disabled
|
||||||
Config []string `json:"config"` // generic values that specific drivers can consume
|
Config []string `json:"config"` // generic values that specific drivers can consume
|
||||||
Resources *Resources `json:"resources"`
|
Resources *Resources `json:"resources"`
|
||||||
|
Mounts []Mount `json:"mounts"`
|
||||||
|
|
||||||
Terminal Terminal `json:"-"` // standard or tty terminal
|
Terminal Terminal `json:"-"` // standard or tty terminal
|
||||||
Console string `json:"-"` // dev/console path
|
Console string `json:"-"` // dev/console path
|
||||||
|
|
|
@ -9,7 +9,7 @@ import (
|
||||||
"path"
|
"path"
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewDriver(name, root string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
|
func NewDriver(name, root, initPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
|
||||||
switch name {
|
switch name {
|
||||||
case "lxc":
|
case "lxc":
|
||||||
// we want to five the lxc driver the full docker root because it needs
|
// we want to five the lxc driver the full docker root because it needs
|
||||||
|
@ -17,7 +17,7 @@ func NewDriver(name, root string, sysInfo *sysinfo.SysInfo) (execdriver.Driver,
|
||||||
// to be backwards compatible
|
// to be backwards compatible
|
||||||
return lxc.NewDriver(root, sysInfo.AppArmor)
|
return lxc.NewDriver(root, sysInfo.AppArmor)
|
||||||
case "native":
|
case "native":
|
||||||
return native.NewDriver(path.Join(root, "execdriver", "native"))
|
return native.NewDriver(path.Join(root, "execdriver", "native"), initPath)
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("unknown exec driver %s", name)
|
return nil, fmt.Errorf("unknown exec driver %s", name)
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,14 @@ lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bi
|
||||||
lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts newinstance,ptmxmode=0666,nosuid,noexec 0 0
|
lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts newinstance,ptmxmode=0666,nosuid,noexec 0 0
|
||||||
lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs size=65536k,nosuid,nodev,noexec 0 0
|
lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs size=65536k,nosuid,nodev,noexec 0 0
|
||||||
|
|
||||||
|
{{range $value := .Mounts}}
|
||||||
|
{{if $value.Writable}}
|
||||||
|
lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,rw 0 0
|
||||||
|
{{else}}
|
||||||
|
lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,ro 0 0
|
||||||
|
{{end}}
|
||||||
|
{{end}}
|
||||||
|
|
||||||
{{if .Privileged}}
|
{{if .Privileged}}
|
||||||
{{if .AppArmor}}
|
{{if .AppArmor}}
|
||||||
lxc.aa_profile = unconfined
|
lxc.aa_profile = unconfined
|
||||||
|
|
|
@ -48,6 +48,10 @@ func createContainer(c *execdriver.Command) *libcontainer.Container {
|
||||||
// check to see if we are running in ramdisk to disable pivot root
|
// check to see if we are running in ramdisk to disable pivot root
|
||||||
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
||||||
|
|
||||||
|
for _, m := range c.Mounts {
|
||||||
|
container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private})
|
||||||
|
}
|
||||||
|
|
||||||
return container
|
return container
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,10 +55,11 @@ func init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
type driver struct {
|
type driver struct {
|
||||||
root string
|
root string
|
||||||
|
initPath string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewDriver(root string) (*driver, error) {
|
func NewDriver(root, initPath string) (*driver, error) {
|
||||||
if err := os.MkdirAll(root, 0700); err != nil {
|
if err := os.MkdirAll(root, 0700); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -66,7 +67,8 @@ func NewDriver(root string) (*driver, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return &driver{
|
return &driver{
|
||||||
root: root,
|
root: root,
|
||||||
|
initPath: initPath,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,7 +212,7 @@ func (d *dockerCommandFactory) Create(container *libcontainer.Container, console
|
||||||
// we need to join the rootfs because nsinit will setup the rootfs and chroot
|
// we need to join the rootfs because nsinit will setup the rootfs and chroot
|
||||||
initPath := filepath.Join(d.c.Rootfs, d.c.InitPath)
|
initPath := filepath.Join(d.c.Rootfs, d.c.InitPath)
|
||||||
|
|
||||||
d.c.Path = initPath
|
d.c.Path = d.driver.initPath
|
||||||
d.c.Args = append([]string{
|
d.c.Args = append([]string{
|
||||||
initPath,
|
initPath,
|
||||||
"-driver", DriverName,
|
"-driver", DriverName,
|
||||||
|
|
|
@ -23,6 +23,7 @@ type Container struct {
|
||||||
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
|
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
|
||||||
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
|
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
|
||||||
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
|
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
|
||||||
|
Mounts []Mount `json:"mounts,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Network defines configuration for a container's networking stack
|
// Network defines configuration for a container's networking stack
|
||||||
|
@ -36,3 +37,12 @@ type Network struct {
|
||||||
Gateway string `json:"gateway,omitempty"`
|
Gateway string `json:"gateway,omitempty"`
|
||||||
Mtu int `json:"mtu,omitempty"`
|
Mtu int `json:"mtu,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bind mounts from the host system to the container
|
||||||
|
//
|
||||||
|
type Mount struct {
|
||||||
|
Source string `json:"source"` // Source path, in the host namespace
|
||||||
|
Destination string `json:"destination"` // Destination path, in the container
|
||||||
|
Writable bool `json:"writable"`
|
||||||
|
Private bool `json:"private"`
|
||||||
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
|
||||||
if err := system.ParentDeathSignal(); err != nil {
|
if err := system.ParentDeathSignal(); err != nil {
|
||||||
return fmt.Errorf("parent death signal %s", err)
|
return fmt.Errorf("parent death signal %s", err)
|
||||||
}
|
}
|
||||||
if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs, container.NoPivotRoot); err != nil {
|
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot); err != nil {
|
||||||
return fmt.Errorf("setup mount namespace %s", err)
|
return fmt.Errorf("setup mount namespace %s", err)
|
||||||
}
|
}
|
||||||
if err := setupNetwork(container, context); err != nil {
|
if err := setupNetwork(container, context); err != nil {
|
||||||
|
|
|
@ -4,6 +4,7 @@ package nsinit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||||
"github.com/dotcloud/docker/pkg/system"
|
"github.com/dotcloud/docker/pkg/system"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
@ -19,7 +20,7 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD
|
||||||
//
|
//
|
||||||
// There is no need to unmount the new mounts because as soon as the mount namespace
|
// There is no need to unmount the new mounts because as soon as the mount namespace
|
||||||
// is no longer in use, the mounts will be removed automatically
|
// is no longer in use, the mounts will be removed automatically
|
||||||
func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool) error {
|
func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool) error {
|
||||||
flag := syscall.MS_PRIVATE
|
flag := syscall.MS_PRIVATE
|
||||||
if noPivotRoot {
|
if noPivotRoot {
|
||||||
flag = syscall.MS_SLAVE
|
flag = syscall.MS_SLAVE
|
||||||
|
@ -38,6 +39,23 @@ func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool)
|
||||||
if err := mountSystem(rootfs); err != nil {
|
if err := mountSystem(rootfs); err != nil {
|
||||||
return fmt.Errorf("mount system %s", err)
|
return fmt.Errorf("mount system %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, m := range bindMounts {
|
||||||
|
flags := syscall.MS_BIND | syscall.MS_REC
|
||||||
|
if !m.Writable {
|
||||||
|
flags = flags | syscall.MS_RDONLY
|
||||||
|
}
|
||||||
|
dest := filepath.Join(rootfs, m.Destination)
|
||||||
|
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
||||||
|
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
||||||
|
}
|
||||||
|
if m.Private {
|
||||||
|
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
||||||
|
return fmt.Errorf("mounting %s private %s", dest, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err := copyDevNodes(rootfs); err != nil {
|
if err := copyDevNodes(rootfs); err != nil {
|
||||||
return fmt.Errorf("copy dev nodes %s", err)
|
return fmt.Errorf("copy dev nodes %s", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -529,13 +529,13 @@ func (container *Container) Start() (err error) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
populateCommand(container)
|
||||||
|
container.command.Env = env
|
||||||
|
|
||||||
if err := mountVolumesForContainer(container, envPath); err != nil {
|
if err := mountVolumesForContainer(container, envPath); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
populateCommand(container)
|
|
||||||
container.command.Env = env
|
|
||||||
|
|
||||||
// Setup logging of stdout and stderr to disk
|
// Setup logging of stdout and stderr to disk
|
||||||
if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {
|
if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -733,7 +733,7 @@ func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (*
|
||||||
}
|
}
|
||||||
|
|
||||||
sysInfo := sysinfo.New(false)
|
sysInfo := sysinfo.New(false)
|
||||||
ed, err := execdrivers.NewDriver(config.ExecDriver, config.Root, sysInfo)
|
ed, err := execdrivers.NewDriver(config.ExecDriver, config.Root, sysInitPath, sysInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package runtime
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/dotcloud/docker/archive"
|
"github.com/dotcloud/docker/archive"
|
||||||
|
"github.com/dotcloud/docker/execdriver"
|
||||||
"github.com/dotcloud/docker/pkg/mount"
|
"github.com/dotcloud/docker/pkg/mount"
|
||||||
"github.com/dotcloud/docker/utils"
|
"github.com/dotcloud/docker/utils"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
@ -55,70 +56,33 @@ func mountVolumesForContainer(container *Container, envPath string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mount docker specific files into the containers root fs
|
mounts := []execdriver.Mount{
|
||||||
if err := mount.Mount(runtime.sysInitPath, filepath.Join(root, "/.dockerinit"), "none", "bind,ro"); err != nil {
|
{runtime.sysInitPath, "/.dockerinit", false, true},
|
||||||
return err
|
{envPath, "/.dockerenv", false, true},
|
||||||
}
|
{container.ResolvConfPath, "/etc/resolv.conf", false, true},
|
||||||
if err := mount.Mount(envPath, filepath.Join(root, "/.dockerenv"), "none", "bind,ro"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := mount.Mount(container.ResolvConfPath, filepath.Join(root, "/etc/resolv.conf"), "none", "bind,ro"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if container.HostnamePath != "" && container.HostsPath != "" {
|
if container.HostnamePath != "" && container.HostsPath != "" {
|
||||||
if err := mount.Mount(container.HostnamePath, filepath.Join(root, "/etc/hostname"), "none", "bind,ro"); err != nil {
|
mounts = append(mounts, execdriver.Mount{container.HostnamePath, "/etc/hostname", false, true})
|
||||||
return err
|
mounts = append(mounts, execdriver.Mount{container.HostsPath, "/etc/hosts", false, true})
|
||||||
}
|
|
||||||
if err := mount.Mount(container.HostsPath, filepath.Join(root, "/etc/hosts"), "none", "bind,ro"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mount user specified volumes
|
// Mount user specified volumes
|
||||||
|
// Note, these are not private because you may want propagation of (un)mounts from host
|
||||||
|
// volumes. For instance if you use -v /usr:/usr and the host later mounts /usr/share you
|
||||||
|
// want this new mount in the container
|
||||||
for r, v := range container.Volumes {
|
for r, v := range container.Volumes {
|
||||||
mountAs := "ro"
|
mounts = append(mounts, execdriver.Mount{v, r, container.VolumesRW[r], false})
|
||||||
if container.VolumesRW[r] {
|
|
||||||
mountAs = "rw"
|
|
||||||
}
|
|
||||||
|
|
||||||
r = filepath.Join(root, r)
|
|
||||||
if p, err := utils.FollowSymlinkInScope(r, root); err != nil {
|
|
||||||
return err
|
|
||||||
} else {
|
|
||||||
r = p
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := mount.Mount(v, r, "none", fmt.Sprintf("bind,%s", mountAs)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
container.command.Mounts = mounts
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func unmountVolumesForContainer(container *Container) {
|
func unmountVolumesForContainer(container *Container) {
|
||||||
var (
|
if err := mount.Unmount(container.RootfsPath()); err != nil {
|
||||||
root = container.RootfsPath()
|
log.Printf("Failed to umount container: %v", err)
|
||||||
mounts = []string{
|
|
||||||
root,
|
|
||||||
filepath.Join(root, "/.dockerinit"),
|
|
||||||
filepath.Join(root, "/.dockerenv"),
|
|
||||||
filepath.Join(root, "/etc/resolv.conf"),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if container.HostnamePath != "" && container.HostsPath != "" {
|
|
||||||
mounts = append(mounts, filepath.Join(root, "/etc/hostname"), filepath.Join(root, "/etc/hosts"))
|
|
||||||
}
|
|
||||||
|
|
||||||
for r := range container.Volumes {
|
|
||||||
mounts = append(mounts, filepath.Join(root, r))
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := len(mounts) - 1; i >= 0; i-- {
|
|
||||||
if lastError := mount.Unmount(mounts[i]); lastError != nil {
|
|
||||||
log.Printf("Failed to umount %v: %v", mounts[i], lastError)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue