mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
![Vivek Goyal](/assets/img/avatar_default.png)
People have reported following problem. - docker run -ti --name=foo -v /dev/:/dev/ fedora bash - docker cp foo:/bin/bash /tmp Once the cp operation is complete, it unmounted /dev/pts on the host. /dev/pts is a submount of /dev/. This is completely unexpected. Following is the reson for this behavior. containerArchivePath() call mountVolumes() which goes through all the mounts points of a container and mounts them in daemon mount namespace in /var/lib/docker/devicemapper/mnt/<containerid>/rootfs dir. And once we have extracted the data required, these are unmounted using UnmountVolumes(). Mounts are done using recursive bind (rbind). And these are unmounted using lazy mount option on top level mount. (detachMounted()). That means if there are submounts under top level mounts, these mount events will propagate and they were "shared" mounts with host, it will unmount the submount on host as well. For example, try following. - Prepare a parent and child mount point. $ mkdir /root/foo $ mount --bind /root/foo /root/foo $ mount --make-rshared /root/foo - Prepare a child mount $ mkdir /root/foo/foo1 $ mount --bind /root/foo/foo1 /root/foo/foo1 - Bind mount foo at bar $ mkdir /root/bar $ mount --rbind /root/foo /root/bar - Now lazy unmount /root/bar and it will unmount /root/foo/foo1 as well. $ umount -l /root/bar This is not unintended. We just wanted to unmount /root/bar and anything underneath but did not have intentions of unmounting anything on source. So far this was not a problem as docker daemon was running in a seprate mount namespace where all propagation was "slave". That means any unmounts in docker daemon namespace did not propagate to host namespace. But now we are running docker daemon in host namespace so that it is possible to mount some volumes "shared" with container. So that if container mounts something it propagates to host namespace as well. Given mountVolumes() seems to be doing only temporary mounts to read some data, there does not seem to be a need to mount these shared/slave. Just mount these private so that on unmount, nothing propagates and does not have unintended consequences. Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
369 lines
11 KiB
Go
369 lines
11 KiB
Go
// +build linux freebsd
|
|
|
|
package daemon
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/docker/docker/container"
|
|
"github.com/docker/docker/daemon/links"
|
|
"github.com/docker/docker/pkg/fileutils"
|
|
"github.com/docker/docker/pkg/idtools"
|
|
"github.com/docker/docker/pkg/mount"
|
|
"github.com/docker/docker/pkg/stringid"
|
|
"github.com/docker/docker/runconfig"
|
|
containertypes "github.com/docker/engine-api/types/container"
|
|
networktypes "github.com/docker/engine-api/types/network"
|
|
"github.com/docker/libnetwork"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/devices"
|
|
"github.com/opencontainers/runc/libcontainer/label"
|
|
"github.com/opencontainers/specs/specs-go"
|
|
)
|
|
|
|
func u32Ptr(i int64) *uint32 { u := uint32(i); return &u }
|
|
func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
|
|
|
|
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
|
|
var env []string
|
|
children := daemon.children(container)
|
|
|
|
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
if bridgeSettings == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
for linkAlias, child := range children {
|
|
if !child.IsRunning() {
|
|
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
|
|
}
|
|
|
|
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
if childBridgeSettings == nil {
|
|
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
|
|
}
|
|
|
|
link := links.NewLink(
|
|
bridgeSettings.IPAddress,
|
|
childBridgeSettings.IPAddress,
|
|
linkAlias,
|
|
child.Config.Env,
|
|
child.Config.ExposedPorts,
|
|
)
|
|
|
|
for _, envVar := range link.ToEnv() {
|
|
env = append(env, envVar)
|
|
}
|
|
}
|
|
|
|
return env, nil
|
|
}
|
|
|
|
// getSize returns the real size & virtual size of the container.
|
|
func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
|
|
var (
|
|
sizeRw, sizeRootfs int64
|
|
err error
|
|
)
|
|
|
|
if err := daemon.Mount(container); err != nil {
|
|
logrus.Errorf("Failed to compute size of container rootfs %s: %s", container.ID, err)
|
|
return sizeRw, sizeRootfs
|
|
}
|
|
defer daemon.Unmount(container)
|
|
|
|
sizeRw, err = container.RWLayer.Size()
|
|
if err != nil {
|
|
logrus.Errorf("Driver %s couldn't return diff size of container %s: %s",
|
|
daemon.GraphDriverName(), container.ID, err)
|
|
// FIXME: GetSize should return an error. Not changing it now in case
|
|
// there is a side-effect.
|
|
sizeRw = -1
|
|
}
|
|
|
|
if parent := container.RWLayer.Parent(); parent != nil {
|
|
sizeRootfs, err = parent.Size()
|
|
if err != nil {
|
|
sizeRootfs = -1
|
|
} else if sizeRw != -1 {
|
|
sizeRootfs += sizeRw
|
|
}
|
|
}
|
|
return sizeRw, sizeRootfs
|
|
}
|
|
|
|
// ConnectToNetwork connects a container to a network
|
|
func (daemon *Daemon) ConnectToNetwork(container *container.Container, idOrName string, endpointConfig *networktypes.EndpointSettings) error {
|
|
if !container.Running {
|
|
if container.RemovalInProgress || container.Dead {
|
|
return errRemovalContainer(container.ID)
|
|
}
|
|
if _, err := daemon.updateNetworkConfig(container, idOrName, endpointConfig, true); err != nil {
|
|
return err
|
|
}
|
|
if endpointConfig != nil {
|
|
container.NetworkSettings.Networks[idOrName] = endpointConfig
|
|
}
|
|
} else {
|
|
if err := daemon.connectToNetwork(container, idOrName, endpointConfig, true); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := container.ToDiskLocking(); err != nil {
|
|
return fmt.Errorf("Error saving container to disk: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// DisconnectFromNetwork disconnects container from network n.
|
|
func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, n libnetwork.Network, force bool) error {
|
|
if container.HostConfig.NetworkMode.IsHost() && containertypes.NetworkMode(n.Type()).IsHost() {
|
|
return runconfig.ErrConflictHostNetwork
|
|
}
|
|
if !container.Running {
|
|
if container.RemovalInProgress || container.Dead {
|
|
return errRemovalContainer(container.ID)
|
|
}
|
|
if _, ok := container.NetworkSettings.Networks[n.Name()]; ok {
|
|
delete(container.NetworkSettings.Networks, n.Name())
|
|
} else {
|
|
return fmt.Errorf("container %s is not connected to the network %s", container.ID, n.Name())
|
|
}
|
|
} else {
|
|
if err := disconnectFromNetwork(container, n, false); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := container.ToDiskLocking(); err != nil {
|
|
return fmt.Errorf("Error saving container to disk: %v", err)
|
|
}
|
|
|
|
attributes := map[string]string{
|
|
"container": container.ID,
|
|
}
|
|
daemon.LogNetworkEventWithAttributes(n, "disconnect", attributes)
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) getIpcContainer(container *container.Container) (*container.Container, error) {
|
|
containerID := container.HostConfig.IpcMode.Container()
|
|
c, err := daemon.GetContainer(containerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !c.IsRunning() {
|
|
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
|
|
}
|
|
if c.IsRestarting() {
|
|
return nil, errContainerIsRestarting(container.ID)
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
|
|
var err error
|
|
|
|
c.ShmPath, err = c.ShmResourcePath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if c.HostConfig.IpcMode.IsContainer() {
|
|
ic, err := daemon.getIpcContainer(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.ShmPath = ic.ShmPath
|
|
} else if c.HostConfig.IpcMode.IsHost() {
|
|
if _, err := os.Stat("/dev/shm"); err != nil {
|
|
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
|
|
}
|
|
c.ShmPath = "/dev/shm"
|
|
} else {
|
|
rootUID, rootGID := daemon.GetRemappedUIDGID()
|
|
if !c.HasMountFor("/dev/shm") {
|
|
shmPath, err := c.ShmResourcePath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
|
|
return err
|
|
}
|
|
|
|
shmSize := container.DefaultSHMSize
|
|
if c.HostConfig.ShmSize != 0 {
|
|
shmSize = c.HostConfig.ShmSize
|
|
}
|
|
shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
|
|
if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
|
|
return fmt.Errorf("mounting shm tmpfs: %s", err)
|
|
}
|
|
if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) mountVolumes(container *container.Container) error {
|
|
mounts, err := daemon.setupMounts(container)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, m := range mounts {
|
|
dest, err := container.GetResourcePath(m.Destination)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var stat os.FileInfo
|
|
stat, err = os.Stat(m.Source)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err = fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
|
|
return err
|
|
}
|
|
|
|
opts := "rbind,ro"
|
|
if m.Writable {
|
|
opts = "rbind,rw"
|
|
}
|
|
|
|
if err := mount.Mount(m.Source, dest, "bind", opts); err != nil {
|
|
return err
|
|
}
|
|
|
|
// mountVolumes() seems to be called for temporary mounts
|
|
// outside the container. Soon these will be unmounted with
|
|
// lazy unmount option and given we have mounted the rbind,
|
|
// all the submounts will propagate if these are shared. If
|
|
// daemon is running in host namespace and has / as shared
|
|
// then these unmounts will propagate and unmount original
|
|
// mount as well. So make all these mounts rprivate.
|
|
// Do not use propagation property of volume as that should
|
|
// apply only when mounting happen inside the container.
|
|
if err := mount.MakeRPrivate(dest); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func killProcessDirectly(container *container.Container) error {
|
|
if _, err := container.WaitStop(10 * time.Second); err != nil {
|
|
// Ensure that we don't kill ourselves
|
|
if pid := container.GetPID(); pid != 0 {
|
|
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
|
|
if err := syscall.Kill(pid, 9); err != nil {
|
|
if err != syscall.ESRCH {
|
|
return err
|
|
}
|
|
e := errNoSuchProcess{pid, 9}
|
|
logrus.Debug(e)
|
|
return e
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func specDevice(d *configs.Device) specs.Device {
|
|
return specs.Device{
|
|
Type: string(d.Type),
|
|
Path: d.Path,
|
|
Major: d.Major,
|
|
Minor: d.Minor,
|
|
FileMode: fmPtr(int64(d.FileMode)),
|
|
UID: u32Ptr(int64(d.Uid)),
|
|
GID: u32Ptr(int64(d.Gid)),
|
|
}
|
|
}
|
|
|
|
func specDeviceCgroup(d *configs.Device) specs.DeviceCgroup {
|
|
t := string(d.Type)
|
|
return specs.DeviceCgroup{
|
|
Allow: true,
|
|
Type: &t,
|
|
Major: &d.Major,
|
|
Minor: &d.Minor,
|
|
Access: &d.Permissions,
|
|
}
|
|
}
|
|
|
|
func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, devPermissions []specs.DeviceCgroup, err error) {
|
|
resolvedPathOnHost := deviceMapping.PathOnHost
|
|
|
|
// check if it is a symbolic link
|
|
if src, e := os.Lstat(deviceMapping.PathOnHost); e == nil && src.Mode()&os.ModeSymlink == os.ModeSymlink {
|
|
if linkedPathOnHost, e := os.Readlink(deviceMapping.PathOnHost); e == nil {
|
|
resolvedPathOnHost = linkedPathOnHost
|
|
}
|
|
}
|
|
|
|
device, err := devices.DeviceFromPath(resolvedPathOnHost, deviceMapping.CgroupPermissions)
|
|
// if there was no error, return the device
|
|
if err == nil {
|
|
device.Path = deviceMapping.PathInContainer
|
|
return append(devs, specDevice(device)), append(devPermissions, specDeviceCgroup(device)), nil
|
|
}
|
|
|
|
// if the device is not a device node
|
|
// try to see if it's a directory holding many devices
|
|
if err == devices.ErrNotADevice {
|
|
|
|
// check if it is a directory
|
|
if src, e := os.Stat(resolvedPathOnHost); e == nil && src.IsDir() {
|
|
|
|
// mount the internal devices recursively
|
|
filepath.Walk(resolvedPathOnHost, func(dpath string, f os.FileInfo, e error) error {
|
|
childDevice, e := devices.DeviceFromPath(dpath, deviceMapping.CgroupPermissions)
|
|
if e != nil {
|
|
// ignore the device
|
|
return nil
|
|
}
|
|
|
|
// add the device to userSpecified devices
|
|
childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
|
|
devs = append(devs, specDevice(childDevice))
|
|
devPermissions = append(devPermissions, specDeviceCgroup(childDevice))
|
|
|
|
return nil
|
|
})
|
|
}
|
|
}
|
|
|
|
if len(devs) > 0 {
|
|
return devs, devPermissions, nil
|
|
}
|
|
|
|
return devs, devPermissions, fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
|
|
}
|
|
|
|
func detachMounted(path string) error {
|
|
return syscall.Unmount(path, syscall.MNT_DETACH)
|
|
}
|
|
|
|
func isLinkable(child *container.Container) bool {
|
|
// A container is linkable only if it belongs to the default network
|
|
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
return ok
|
|
}
|
|
|
|
func errRemovalContainer(containerID string) error {
|
|
return fmt.Errorf("Container %s is marked for removal and cannot be connected or disconnected to the network", containerID)
|
|
}
|