1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/container_operations_unix.go
Vivek Goyal cacd400777 Mount volumes rprivate for archival and other use cases
People have reported following problem.

- docker run -ti --name=foo -v /dev/:/dev/ fedora bash
- docker cp foo:/bin/bash /tmp

Once the cp operation is complete, it unmounted /dev/pts on the host. /dev/pts
is a submount of /dev/. This is completely unexpected. Following is the
reson for this behavior.

containerArchivePath() call mountVolumes() which goes through all the mounts
points of a container and mounts them in daemon mount namespace in
/var/lib/docker/devicemapper/mnt/<containerid>/rootfs dir. And once we have
extracted the data required, these are unmounted using UnmountVolumes().

Mounts are done using recursive bind (rbind). And these are unmounted using
lazy mount option on top level mount. (detachMounted()). That means if there
are submounts under top level mounts, these mount events will propagate and
they were "shared" mounts with host, it will unmount the submount on host
as well.

For example, try following.

- Prepare a parent and child mount point.
  $ mkdir /root/foo
  $ mount --bind /root/foo /root/foo 
  $ mount --make-rshared /root/foo
  
- Prepare a child mount 

  $ mkdir /root/foo/foo1
  $ mount --bind /root/foo/foo1 /root/foo/foo1
 
- Bind mount foo at bar

  $ mkdir /root/bar
  $ mount --rbind /root/foo /root/bar
  
- Now lazy unmount /root/bar and it will unmount /root/foo/foo1 as well.

  $ umount -l /root/bar

This is not unintended. We just wanted to unmount /root/bar and anything
underneath but did not have intentions of unmounting anything on source.

So far this was not a problem as docker daemon was running in a seprate
mount namespace where all propagation was "slave". That means any unmounts
in docker daemon namespace did not propagate to host namespace. 

But now we are running docker daemon in host namespace so that it is possible
to mount some volumes "shared" with container. So that if container mounts
something it propagates to host namespace as well. 

Given mountVolumes() seems to be doing only temporary mounts to read some
data, there does not seem to be a need to mount these shared/slave. Just
mount these private so that on unmount, nothing propagates and does not
have unintended consequences. 

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2016-04-15 14:03:11 +00:00

369 lines
11 KiB
Go

// +build linux freebsd
package daemon
import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/links"
"github.com/docker/docker/pkg/fileutils"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/runconfig"
containertypes "github.com/docker/engine-api/types/container"
networktypes "github.com/docker/engine-api/types/network"
"github.com/docker/libnetwork"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/specs/specs-go"
)
func u32Ptr(i int64) *uint32 { u := uint32(i); return &u }
func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
var env []string
children := daemon.children(container)
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if bridgeSettings == nil {
return nil, nil
}
for linkAlias, child := range children {
if !child.IsRunning() {
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
}
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if childBridgeSettings == nil {
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
}
link := links.NewLink(
bridgeSettings.IPAddress,
childBridgeSettings.IPAddress,
linkAlias,
child.Config.Env,
child.Config.ExposedPorts,
)
for _, envVar := range link.ToEnv() {
env = append(env, envVar)
}
}
return env, nil
}
// getSize returns the real size & virtual size of the container.
func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
var (
sizeRw, sizeRootfs int64
err error
)
if err := daemon.Mount(container); err != nil {
logrus.Errorf("Failed to compute size of container rootfs %s: %s", container.ID, err)
return sizeRw, sizeRootfs
}
defer daemon.Unmount(container)
sizeRw, err = container.RWLayer.Size()
if err != nil {
logrus.Errorf("Driver %s couldn't return diff size of container %s: %s",
daemon.GraphDriverName(), container.ID, err)
// FIXME: GetSize should return an error. Not changing it now in case
// there is a side-effect.
sizeRw = -1
}
if parent := container.RWLayer.Parent(); parent != nil {
sizeRootfs, err = parent.Size()
if err != nil {
sizeRootfs = -1
} else if sizeRw != -1 {
sizeRootfs += sizeRw
}
}
return sizeRw, sizeRootfs
}
// ConnectToNetwork connects a container to a network
func (daemon *Daemon) ConnectToNetwork(container *container.Container, idOrName string, endpointConfig *networktypes.EndpointSettings) error {
if !container.Running {
if container.RemovalInProgress || container.Dead {
return errRemovalContainer(container.ID)
}
if _, err := daemon.updateNetworkConfig(container, idOrName, endpointConfig, true); err != nil {
return err
}
if endpointConfig != nil {
container.NetworkSettings.Networks[idOrName] = endpointConfig
}
} else {
if err := daemon.connectToNetwork(container, idOrName, endpointConfig, true); err != nil {
return err
}
}
if err := container.ToDiskLocking(); err != nil {
return fmt.Errorf("Error saving container to disk: %v", err)
}
return nil
}
// DisconnectFromNetwork disconnects container from network n.
func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, n libnetwork.Network, force bool) error {
if container.HostConfig.NetworkMode.IsHost() && containertypes.NetworkMode(n.Type()).IsHost() {
return runconfig.ErrConflictHostNetwork
}
if !container.Running {
if container.RemovalInProgress || container.Dead {
return errRemovalContainer(container.ID)
}
if _, ok := container.NetworkSettings.Networks[n.Name()]; ok {
delete(container.NetworkSettings.Networks, n.Name())
} else {
return fmt.Errorf("container %s is not connected to the network %s", container.ID, n.Name())
}
} else {
if err := disconnectFromNetwork(container, n, false); err != nil {
return err
}
}
if err := container.ToDiskLocking(); err != nil {
return fmt.Errorf("Error saving container to disk: %v", err)
}
attributes := map[string]string{
"container": container.ID,
}
daemon.LogNetworkEventWithAttributes(n, "disconnect", attributes)
return nil
}
func (daemon *Daemon) getIpcContainer(container *container.Container) (*container.Container, error) {
containerID := container.HostConfig.IpcMode.Container()
c, err := daemon.GetContainer(containerID)
if err != nil {
return nil, err
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
}
if c.IsRestarting() {
return nil, errContainerIsRestarting(container.ID)
}
return c, nil
}
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
var err error
c.ShmPath, err = c.ShmResourcePath()
if err != nil {
return err
}
if c.HostConfig.IpcMode.IsContainer() {
ic, err := daemon.getIpcContainer(c)
if err != nil {
return err
}
c.ShmPath = ic.ShmPath
} else if c.HostConfig.IpcMode.IsHost() {
if _, err := os.Stat("/dev/shm"); err != nil {
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
}
c.ShmPath = "/dev/shm"
} else {
rootUID, rootGID := daemon.GetRemappedUIDGID()
if !c.HasMountFor("/dev/shm") {
shmPath, err := c.ShmResourcePath()
if err != nil {
return err
}
if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
return err
}
shmSize := container.DefaultSHMSize
if c.HostConfig.ShmSize != 0 {
shmSize = c.HostConfig.ShmSize
}
shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
return fmt.Errorf("mounting shm tmpfs: %s", err)
}
if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
return err
}
}
}
return nil
}
func (daemon *Daemon) mountVolumes(container *container.Container) error {
mounts, err := daemon.setupMounts(container)
if err != nil {
return err
}
for _, m := range mounts {
dest, err := container.GetResourcePath(m.Destination)
if err != nil {
return err
}
var stat os.FileInfo
stat, err = os.Stat(m.Source)
if err != nil {
return err
}
if err = fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
opts := "rbind,ro"
if m.Writable {
opts = "rbind,rw"
}
if err := mount.Mount(m.Source, dest, "bind", opts); err != nil {
return err
}
// mountVolumes() seems to be called for temporary mounts
// outside the container. Soon these will be unmounted with
// lazy unmount option and given we have mounted the rbind,
// all the submounts will propagate if these are shared. If
// daemon is running in host namespace and has / as shared
// then these unmounts will propagate and unmount original
// mount as well. So make all these mounts rprivate.
// Do not use propagation property of volume as that should
// apply only when mounting happen inside the container.
if err := mount.MakeRPrivate(dest); err != nil {
return err
}
}
return nil
}
func killProcessDirectly(container *container.Container) error {
if _, err := container.WaitStop(10 * time.Second); err != nil {
// Ensure that we don't kill ourselves
if pid := container.GetPID(); pid != 0 {
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
if err := syscall.Kill(pid, 9); err != nil {
if err != syscall.ESRCH {
return err
}
e := errNoSuchProcess{pid, 9}
logrus.Debug(e)
return e
}
}
}
return nil
}
func specDevice(d *configs.Device) specs.Device {
return specs.Device{
Type: string(d.Type),
Path: d.Path,
Major: d.Major,
Minor: d.Minor,
FileMode: fmPtr(int64(d.FileMode)),
UID: u32Ptr(int64(d.Uid)),
GID: u32Ptr(int64(d.Gid)),
}
}
func specDeviceCgroup(d *configs.Device) specs.DeviceCgroup {
t := string(d.Type)
return specs.DeviceCgroup{
Allow: true,
Type: &t,
Major: &d.Major,
Minor: &d.Minor,
Access: &d.Permissions,
}
}
func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, devPermissions []specs.DeviceCgroup, err error) {
resolvedPathOnHost := deviceMapping.PathOnHost
// check if it is a symbolic link
if src, e := os.Lstat(deviceMapping.PathOnHost); e == nil && src.Mode()&os.ModeSymlink == os.ModeSymlink {
if linkedPathOnHost, e := os.Readlink(deviceMapping.PathOnHost); e == nil {
resolvedPathOnHost = linkedPathOnHost
}
}
device, err := devices.DeviceFromPath(resolvedPathOnHost, deviceMapping.CgroupPermissions)
// if there was no error, return the device
if err == nil {
device.Path = deviceMapping.PathInContainer
return append(devs, specDevice(device)), append(devPermissions, specDeviceCgroup(device)), nil
}
// if the device is not a device node
// try to see if it's a directory holding many devices
if err == devices.ErrNotADevice {
// check if it is a directory
if src, e := os.Stat(resolvedPathOnHost); e == nil && src.IsDir() {
// mount the internal devices recursively
filepath.Walk(resolvedPathOnHost, func(dpath string, f os.FileInfo, e error) error {
childDevice, e := devices.DeviceFromPath(dpath, deviceMapping.CgroupPermissions)
if e != nil {
// ignore the device
return nil
}
// add the device to userSpecified devices
childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
devs = append(devs, specDevice(childDevice))
devPermissions = append(devPermissions, specDeviceCgroup(childDevice))
return nil
})
}
}
if len(devs) > 0 {
return devs, devPermissions, nil
}
return devs, devPermissions, fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
}
func detachMounted(path string) error {
return syscall.Unmount(path, syscall.MNT_DETACH)
}
func isLinkable(child *container.Container) bool {
// A container is linkable only if it belongs to the default network
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
return ok
}
func errRemovalContainer(containerID string) error {
return fmt.Errorf("Container %s is marked for removal and cannot be connected or disconnected to the network", containerID)
}