1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/container_operations_unix.go
Kir Kolyshkin 596ca142e0 daemon: use 'private' ipc mode by default
This changes the default ipc mode of daemon/engine to be private,
meaning the containers will not have their /dev/shm bind-mounted
from the host by default. The benefits of doing this are:

 1. No leaked mounts. Eliminate a possibility to leak mounts into
    other namespaces (and therefore unfortunate errors like "Unable to
    remove filesystem for <ID>: remove /var/lib/docker/containers/<ID>/shm:
    device or resource busy").

 2. Working checkpoint/restore. Make `docker checkpoint`
    not lose the contents of `/dev/shm`, but save it to
    the dump, and be restored back upon `docker start --checkpoint`
    (currently it is lost -- while CRIU handles tmpfs mounts,
    the "shareable" mount is seen as external to container,
    and thus rightfully ignored).

3. Better security. Currently any container is opened to share
   its /dev/shm with any other container.

Obviously, this change will break the following usage scenario:

 $ docker run -d --name donor busybox top
 $ docker run --rm -it --ipc container:donor busybox sh
 Error response from daemon: linux spec namespaces: can't join IPC
 of container <ID>: non-shareable IPC (hint: use IpcMode:shareable
 for the donor container)

The soution, as hinted by the (amended) error message, is to
explicitly enable donor sharing by using --ipc shareable:

 $ docker run -d --name donor --ipc shareable busybox top

Compatibility notes:

1. This only applies to containers created _after_ this change.
   Existing containers are not affected and will work fine
   as their ipc mode is stored in HostConfig.

2. Old backward compatible behavior ("shareable" containers
   by default) can be enabled by either using
   `--default-ipc-mode shareable` daemon command line option,
   or by adding a `"default-ipc-mode": "shareable"`
   line in `/etc/docker/daemon.json` configuration file.

3. If an older client (API < 1.40) is used, a "shareable" container
   is created. A test to check that is added.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2019-03-09 18:57:42 -08:00

414 lines
13 KiB
Go

// +build linux freebsd
package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"time"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/links"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/runconfig"
"github.com/docker/libnetwork"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
var env []string
children := daemon.children(container)
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if bridgeSettings == nil || bridgeSettings.EndpointSettings == nil {
return nil, nil
}
for linkAlias, child := range children {
if !child.IsRunning() {
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
}
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if childBridgeSettings == nil || childBridgeSettings.EndpointSettings == nil {
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
}
link := links.NewLink(
bridgeSettings.IPAddress,
childBridgeSettings.IPAddress,
linkAlias,
child.Config.Env,
child.Config.ExposedPorts,
)
env = append(env, link.ToEnv()...)
}
return env, nil
}
func (daemon *Daemon) getIpcContainer(id string) (*container.Container, error) {
errMsg := "can't join IPC of container " + id
// Check the container exists
container, err := daemon.GetContainer(id)
if err != nil {
return nil, errors.Wrap(err, errMsg)
}
// Check the container is running and not restarting
if err := daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting); err != nil {
return nil, errors.Wrap(err, errMsg)
}
// Check the container ipc is shareable
if st, err := os.Stat(container.ShmPath); err != nil || !st.IsDir() {
if err == nil || os.IsNotExist(err) {
return nil, errors.New(errMsg + ": non-shareable IPC (hint: use IpcMode:shareable for the donor container)")
}
// stat() failed?
return nil, errors.Wrap(err, errMsg+": unexpected error from stat "+container.ShmPath)
}
return container, nil
}
func (daemon *Daemon) getPidContainer(container *container.Container) (*container.Container, error) {
containerID := container.HostConfig.PidMode.Container()
container, err := daemon.GetContainer(containerID)
if err != nil {
return nil, errors.Wrapf(err, "cannot join PID of a non running container: %s", containerID)
}
return container, daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting)
}
func containerIsRunning(c *container.Container) error {
if !c.IsRunning() {
return errdefs.Conflict(errors.Errorf("container %s is not running", c.ID))
}
return nil
}
func containerIsNotRestarting(c *container.Container) error {
if c.IsRestarting() {
return errContainerIsRestarting(c.ID)
}
return nil
}
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
ipcMode := c.HostConfig.IpcMode
switch {
case ipcMode.IsContainer():
ic, err := daemon.getIpcContainer(ipcMode.Container())
if err != nil {
return err
}
c.ShmPath = ic.ShmPath
case ipcMode.IsHost():
if _, err := os.Stat("/dev/shm"); err != nil {
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
}
c.ShmPath = "/dev/shm"
case ipcMode.IsPrivate(), ipcMode.IsNone():
// c.ShmPath will/should not be used, so make it empty.
// Container's /dev/shm mount comes from OCI spec.
c.ShmPath = ""
case ipcMode.IsEmpty():
// A container was created by an older version of the daemon.
// The default behavior used to be what is now called "shareable".
fallthrough
case ipcMode.IsShareable():
rootIDs := daemon.idMapping.RootPair()
if !c.HasMountFor("/dev/shm") {
shmPath, err := c.ShmResourcePath()
if err != nil {
return err
}
if err := idtools.MkdirAllAndChown(shmPath, 0700, rootIDs); err != nil {
return err
}
shmproperty := "mode=1777,size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
if err := unix.Mount("shm", shmPath, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
return fmt.Errorf("mounting shm tmpfs: %s", err)
}
if err := os.Chown(shmPath, rootIDs.UID, rootIDs.GID); err != nil {
return err
}
c.ShmPath = shmPath
}
default:
return fmt.Errorf("invalid IPC mode: %v", ipcMode)
}
return nil
}
func (daemon *Daemon) setupSecretDir(c *container.Container) (setupErr error) {
if len(c.SecretReferences) == 0 && len(c.ConfigReferences) == 0 {
return nil
}
if err := daemon.createSecretsDir(c); err != nil {
return err
}
defer func() {
if setupErr != nil {
daemon.cleanupSecretDir(c)
}
}()
if c.DependencyStore == nil {
return fmt.Errorf("secret store is not initialized")
}
// retrieve possible remapped range start for root UID, GID
rootIDs := daemon.idMapping.RootPair()
for _, s := range c.SecretReferences {
// TODO (ehazlett): use type switch when more are supported
if s.File == nil {
logrus.Error("secret target type is not a file target")
continue
}
// secrets are created in the SecretMountPath on the host, at a
// single level
fPath, err := c.SecretFilePath(*s)
if err != nil {
return errors.Wrap(err, "error getting secret file path")
}
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating secret mount path")
}
logrus.WithFields(logrus.Fields{
"name": s.File.Name,
"path": fPath,
}).Debug("injecting secret")
secret, err := c.DependencyStore.Secrets().Get(s.SecretID)
if err != nil {
return errors.Wrap(err, "unable to get secret from secret store")
}
if err := ioutil.WriteFile(fPath, secret.Spec.Data, s.File.Mode); err != nil {
return errors.Wrap(err, "error injecting secret")
}
uid, err := strconv.Atoi(s.File.UID)
if err != nil {
return err
}
gid, err := strconv.Atoi(s.File.GID)
if err != nil {
return err
}
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
return errors.Wrap(err, "error setting ownership for secret")
}
if err := os.Chmod(fPath, s.File.Mode); err != nil {
return errors.Wrap(err, "error setting file mode for secret")
}
}
for _, ref := range c.ConfigReferences {
// TODO (ehazlett): use type switch when more are supported
if ref.File == nil {
// Runtime configs are not mounted into the container, but they're
// a valid type of config so we should not error when we encounter
// one.
if ref.Runtime == nil {
logrus.Error("config target type is not a file or runtime target")
}
// However, in any case, this isn't a file config, so we have no
// further work to do
continue
}
fPath, err := c.ConfigFilePath(*ref)
if err != nil {
return errors.Wrap(err, "error getting config file path for container")
}
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating config mount path")
}
logrus.WithFields(logrus.Fields{
"name": ref.File.Name,
"path": fPath,
}).Debug("injecting config")
config, err := c.DependencyStore.Configs().Get(ref.ConfigID)
if err != nil {
return errors.Wrap(err, "unable to get config from config store")
}
if err := ioutil.WriteFile(fPath, config.Spec.Data, ref.File.Mode); err != nil {
return errors.Wrap(err, "error injecting config")
}
uid, err := strconv.Atoi(ref.File.UID)
if err != nil {
return err
}
gid, err := strconv.Atoi(ref.File.GID)
if err != nil {
return err
}
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
return errors.Wrap(err, "error setting ownership for config")
}
if err := os.Chmod(fPath, ref.File.Mode); err != nil {
return errors.Wrap(err, "error setting file mode for config")
}
}
return daemon.remountSecretDir(c)
}
// createSecretsDir is used to create a dir suitable for storing container secrets.
// In practice this is using a tmpfs mount and is used for both "configs" and "secrets"
func (daemon *Daemon) createSecretsDir(c *container.Container) error {
// retrieve possible remapped range start for root UID, GID
rootIDs := daemon.idMapping.RootPair()
dir, err := c.SecretMountPath()
if err != nil {
return errors.Wrap(err, "error getting container secrets dir")
}
// create tmpfs
if err := idtools.MkdirAllAndChown(dir, 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating secret local mount path")
}
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
if err := mount.Mount("tmpfs", dir, "tmpfs", "nodev,nosuid,noexec,"+tmpfsOwnership); err != nil {
return errors.Wrap(err, "unable to setup secret mount")
}
return nil
}
func (daemon *Daemon) remountSecretDir(c *container.Container) error {
dir, err := c.SecretMountPath()
if err != nil {
return errors.Wrap(err, "error getting container secrets path")
}
if err := label.Relabel(dir, c.MountLabel, false); err != nil {
logrus.WithError(err).WithField("dir", dir).Warn("Error while attempting to set selinux label")
}
rootIDs := daemon.idMapping.RootPair()
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
// remount secrets ro
if err := mount.Mount("tmpfs", dir, "tmpfs", "remount,ro,"+tmpfsOwnership); err != nil {
return errors.Wrap(err, "unable to remount dir as readonly")
}
return nil
}
func (daemon *Daemon) cleanupSecretDir(c *container.Container) {
dir, err := c.SecretMountPath()
if err != nil {
logrus.WithError(err).WithField("container", c.ID).Warn("error getting secrets mount path for container")
}
if err := mount.RecursiveUnmount(dir); err != nil {
logrus.WithField("dir", dir).WithError(err).Warn("Error while attempting to unmount dir, this may prevent removal of container.")
}
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
logrus.WithField("dir", dir).WithError(err).Error("Error removing dir.")
}
}
func killProcessDirectly(cntr *container.Container) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// Block until the container to stops or timeout.
status := <-cntr.Wait(ctx, container.WaitConditionNotRunning)
if status.Err() != nil {
// Ensure that we don't kill ourselves
if pid := cntr.GetPID(); pid != 0 {
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(cntr.ID))
if err := unix.Kill(pid, 9); err != nil {
if err != unix.ESRCH {
return err
}
e := errNoSuchProcess{pid, 9}
logrus.Debug(e)
return e
}
}
}
return nil
}
func isLinkable(child *container.Container) bool {
// A container is linkable only if it belongs to the default network
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
return ok
}
func enableIPOnPredefinedNetwork() bool {
return false
}
func (daemon *Daemon) isNetworkHotPluggable() bool {
return true
}
func (daemon *Daemon) setupPathsAndSandboxOptions(container *container.Container, sboxOptions *[]libnetwork.SandboxOption) error {
var err error
if container.HostConfig.NetworkMode.IsHost() {
// Point to the host files, so that will be copied into the container running in host mode
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath("/etc/resolv.conf"))
} else {
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath(daemon.configStore.GetResolvConf()))
}
container.HostsPath, err = container.GetRootResourcePath("hosts")
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionHostsPath(container.HostsPath))
container.ResolvConfPath, err = container.GetRootResourcePath("resolv.conf")
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionResolvConfPath(container.ResolvConfPath))
return nil
}
func (daemon *Daemon) initializeNetworkingPaths(container *container.Container, nc *container.Container) error {
container.HostnamePath = nc.HostnamePath
container.HostsPath = nc.HostsPath
container.ResolvConfPath = nc.ResolvConfPath
return nil
}
func (daemon *Daemon) setupContainerMountsRoot(c *container.Container) error {
// get the root mount path so we can make it unbindable
p, err := c.MountsResourcePath("")
if err != nil {
return err
}
return idtools.MkdirAllAndChown(p, 0700, daemon.idMapping.RootPair())
}