mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
596ca142e0
This changes the default ipc mode of daemon/engine to be private, meaning the containers will not have their /dev/shm bind-mounted from the host by default. The benefits of doing this are: 1. No leaked mounts. Eliminate a possibility to leak mounts into other namespaces (and therefore unfortunate errors like "Unable to remove filesystem for <ID>: remove /var/lib/docker/containers/<ID>/shm: device or resource busy"). 2. Working checkpoint/restore. Make `docker checkpoint` not lose the contents of `/dev/shm`, but save it to the dump, and be restored back upon `docker start --checkpoint` (currently it is lost -- while CRIU handles tmpfs mounts, the "shareable" mount is seen as external to container, and thus rightfully ignored). 3. Better security. Currently any container is opened to share its /dev/shm with any other container. Obviously, this change will break the following usage scenario: $ docker run -d --name donor busybox top $ docker run --rm -it --ipc container:donor busybox sh Error response from daemon: linux spec namespaces: can't join IPC of container <ID>: non-shareable IPC (hint: use IpcMode:shareable for the donor container) The soution, as hinted by the (amended) error message, is to explicitly enable donor sharing by using --ipc shareable: $ docker run -d --name donor --ipc shareable busybox top Compatibility notes: 1. This only applies to containers created _after_ this change. Existing containers are not affected and will work fine as their ipc mode is stored in HostConfig. 2. Old backward compatible behavior ("shareable" containers by default) can be enabled by either using `--default-ipc-mode shareable` daemon command line option, or by adding a `"default-ipc-mode": "shareable"` line in `/etc/docker/daemon.json` configuration file. 3. If an older client (API < 1.40) is used, a "shareable" container is created. A test to check that is added. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
414 lines
13 KiB
Go
414 lines
13 KiB
Go
// +build linux freebsd
|
|
|
|
package daemon // import "github.com/docker/docker/daemon"
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/docker/docker/container"
|
|
"github.com/docker/docker/daemon/links"
|
|
"github.com/docker/docker/errdefs"
|
|
"github.com/docker/docker/pkg/idtools"
|
|
"github.com/docker/docker/pkg/mount"
|
|
"github.com/docker/docker/pkg/stringid"
|
|
"github.com/docker/docker/runconfig"
|
|
"github.com/docker/libnetwork"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
|
|
var env []string
|
|
children := daemon.children(container)
|
|
|
|
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
if bridgeSettings == nil || bridgeSettings.EndpointSettings == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
for linkAlias, child := range children {
|
|
if !child.IsRunning() {
|
|
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
|
|
}
|
|
|
|
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
if childBridgeSettings == nil || childBridgeSettings.EndpointSettings == nil {
|
|
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
|
|
}
|
|
|
|
link := links.NewLink(
|
|
bridgeSettings.IPAddress,
|
|
childBridgeSettings.IPAddress,
|
|
linkAlias,
|
|
child.Config.Env,
|
|
child.Config.ExposedPorts,
|
|
)
|
|
|
|
env = append(env, link.ToEnv()...)
|
|
}
|
|
|
|
return env, nil
|
|
}
|
|
|
|
func (daemon *Daemon) getIpcContainer(id string) (*container.Container, error) {
|
|
errMsg := "can't join IPC of container " + id
|
|
// Check the container exists
|
|
container, err := daemon.GetContainer(id)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, errMsg)
|
|
}
|
|
// Check the container is running and not restarting
|
|
if err := daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting); err != nil {
|
|
return nil, errors.Wrap(err, errMsg)
|
|
}
|
|
// Check the container ipc is shareable
|
|
if st, err := os.Stat(container.ShmPath); err != nil || !st.IsDir() {
|
|
if err == nil || os.IsNotExist(err) {
|
|
return nil, errors.New(errMsg + ": non-shareable IPC (hint: use IpcMode:shareable for the donor container)")
|
|
}
|
|
// stat() failed?
|
|
return nil, errors.Wrap(err, errMsg+": unexpected error from stat "+container.ShmPath)
|
|
}
|
|
|
|
return container, nil
|
|
}
|
|
|
|
func (daemon *Daemon) getPidContainer(container *container.Container) (*container.Container, error) {
|
|
containerID := container.HostConfig.PidMode.Container()
|
|
container, err := daemon.GetContainer(containerID)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "cannot join PID of a non running container: %s", containerID)
|
|
}
|
|
return container, daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting)
|
|
}
|
|
|
|
func containerIsRunning(c *container.Container) error {
|
|
if !c.IsRunning() {
|
|
return errdefs.Conflict(errors.Errorf("container %s is not running", c.ID))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func containerIsNotRestarting(c *container.Container) error {
|
|
if c.IsRestarting() {
|
|
return errContainerIsRestarting(c.ID)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
|
|
ipcMode := c.HostConfig.IpcMode
|
|
|
|
switch {
|
|
case ipcMode.IsContainer():
|
|
ic, err := daemon.getIpcContainer(ipcMode.Container())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.ShmPath = ic.ShmPath
|
|
|
|
case ipcMode.IsHost():
|
|
if _, err := os.Stat("/dev/shm"); err != nil {
|
|
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
|
|
}
|
|
c.ShmPath = "/dev/shm"
|
|
|
|
case ipcMode.IsPrivate(), ipcMode.IsNone():
|
|
// c.ShmPath will/should not be used, so make it empty.
|
|
// Container's /dev/shm mount comes from OCI spec.
|
|
c.ShmPath = ""
|
|
|
|
case ipcMode.IsEmpty():
|
|
// A container was created by an older version of the daemon.
|
|
// The default behavior used to be what is now called "shareable".
|
|
fallthrough
|
|
|
|
case ipcMode.IsShareable():
|
|
rootIDs := daemon.idMapping.RootPair()
|
|
if !c.HasMountFor("/dev/shm") {
|
|
shmPath, err := c.ShmResourcePath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := idtools.MkdirAllAndChown(shmPath, 0700, rootIDs); err != nil {
|
|
return err
|
|
}
|
|
|
|
shmproperty := "mode=1777,size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
|
|
if err := unix.Mount("shm", shmPath, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
|
|
return fmt.Errorf("mounting shm tmpfs: %s", err)
|
|
}
|
|
if err := os.Chown(shmPath, rootIDs.UID, rootIDs.GID); err != nil {
|
|
return err
|
|
}
|
|
c.ShmPath = shmPath
|
|
}
|
|
|
|
default:
|
|
return fmt.Errorf("invalid IPC mode: %v", ipcMode)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) setupSecretDir(c *container.Container) (setupErr error) {
|
|
if len(c.SecretReferences) == 0 && len(c.ConfigReferences) == 0 {
|
|
return nil
|
|
}
|
|
|
|
if err := daemon.createSecretsDir(c); err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
if setupErr != nil {
|
|
daemon.cleanupSecretDir(c)
|
|
}
|
|
}()
|
|
|
|
if c.DependencyStore == nil {
|
|
return fmt.Errorf("secret store is not initialized")
|
|
}
|
|
|
|
// retrieve possible remapped range start for root UID, GID
|
|
rootIDs := daemon.idMapping.RootPair()
|
|
|
|
for _, s := range c.SecretReferences {
|
|
// TODO (ehazlett): use type switch when more are supported
|
|
if s.File == nil {
|
|
logrus.Error("secret target type is not a file target")
|
|
continue
|
|
}
|
|
|
|
// secrets are created in the SecretMountPath on the host, at a
|
|
// single level
|
|
fPath, err := c.SecretFilePath(*s)
|
|
if err != nil {
|
|
return errors.Wrap(err, "error getting secret file path")
|
|
}
|
|
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
|
|
return errors.Wrap(err, "error creating secret mount path")
|
|
}
|
|
|
|
logrus.WithFields(logrus.Fields{
|
|
"name": s.File.Name,
|
|
"path": fPath,
|
|
}).Debug("injecting secret")
|
|
secret, err := c.DependencyStore.Secrets().Get(s.SecretID)
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to get secret from secret store")
|
|
}
|
|
if err := ioutil.WriteFile(fPath, secret.Spec.Data, s.File.Mode); err != nil {
|
|
return errors.Wrap(err, "error injecting secret")
|
|
}
|
|
|
|
uid, err := strconv.Atoi(s.File.UID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
gid, err := strconv.Atoi(s.File.GID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
|
|
return errors.Wrap(err, "error setting ownership for secret")
|
|
}
|
|
if err := os.Chmod(fPath, s.File.Mode); err != nil {
|
|
return errors.Wrap(err, "error setting file mode for secret")
|
|
}
|
|
}
|
|
|
|
for _, ref := range c.ConfigReferences {
|
|
// TODO (ehazlett): use type switch when more are supported
|
|
if ref.File == nil {
|
|
// Runtime configs are not mounted into the container, but they're
|
|
// a valid type of config so we should not error when we encounter
|
|
// one.
|
|
if ref.Runtime == nil {
|
|
logrus.Error("config target type is not a file or runtime target")
|
|
}
|
|
// However, in any case, this isn't a file config, so we have no
|
|
// further work to do
|
|
continue
|
|
}
|
|
|
|
fPath, err := c.ConfigFilePath(*ref)
|
|
if err != nil {
|
|
return errors.Wrap(err, "error getting config file path for container")
|
|
}
|
|
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
|
|
return errors.Wrap(err, "error creating config mount path")
|
|
}
|
|
|
|
logrus.WithFields(logrus.Fields{
|
|
"name": ref.File.Name,
|
|
"path": fPath,
|
|
}).Debug("injecting config")
|
|
config, err := c.DependencyStore.Configs().Get(ref.ConfigID)
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to get config from config store")
|
|
}
|
|
if err := ioutil.WriteFile(fPath, config.Spec.Data, ref.File.Mode); err != nil {
|
|
return errors.Wrap(err, "error injecting config")
|
|
}
|
|
|
|
uid, err := strconv.Atoi(ref.File.UID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
gid, err := strconv.Atoi(ref.File.GID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
|
|
return errors.Wrap(err, "error setting ownership for config")
|
|
}
|
|
if err := os.Chmod(fPath, ref.File.Mode); err != nil {
|
|
return errors.Wrap(err, "error setting file mode for config")
|
|
}
|
|
}
|
|
|
|
return daemon.remountSecretDir(c)
|
|
}
|
|
|
|
// createSecretsDir is used to create a dir suitable for storing container secrets.
|
|
// In practice this is using a tmpfs mount and is used for both "configs" and "secrets"
|
|
func (daemon *Daemon) createSecretsDir(c *container.Container) error {
|
|
// retrieve possible remapped range start for root UID, GID
|
|
rootIDs := daemon.idMapping.RootPair()
|
|
dir, err := c.SecretMountPath()
|
|
if err != nil {
|
|
return errors.Wrap(err, "error getting container secrets dir")
|
|
}
|
|
|
|
// create tmpfs
|
|
if err := idtools.MkdirAllAndChown(dir, 0700, rootIDs); err != nil {
|
|
return errors.Wrap(err, "error creating secret local mount path")
|
|
}
|
|
|
|
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
|
|
if err := mount.Mount("tmpfs", dir, "tmpfs", "nodev,nosuid,noexec,"+tmpfsOwnership); err != nil {
|
|
return errors.Wrap(err, "unable to setup secret mount")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) remountSecretDir(c *container.Container) error {
|
|
dir, err := c.SecretMountPath()
|
|
if err != nil {
|
|
return errors.Wrap(err, "error getting container secrets path")
|
|
}
|
|
if err := label.Relabel(dir, c.MountLabel, false); err != nil {
|
|
logrus.WithError(err).WithField("dir", dir).Warn("Error while attempting to set selinux label")
|
|
}
|
|
rootIDs := daemon.idMapping.RootPair()
|
|
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
|
|
|
|
// remount secrets ro
|
|
if err := mount.Mount("tmpfs", dir, "tmpfs", "remount,ro,"+tmpfsOwnership); err != nil {
|
|
return errors.Wrap(err, "unable to remount dir as readonly")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) cleanupSecretDir(c *container.Container) {
|
|
dir, err := c.SecretMountPath()
|
|
if err != nil {
|
|
logrus.WithError(err).WithField("container", c.ID).Warn("error getting secrets mount path for container")
|
|
}
|
|
if err := mount.RecursiveUnmount(dir); err != nil {
|
|
logrus.WithField("dir", dir).WithError(err).Warn("Error while attempting to unmount dir, this may prevent removal of container.")
|
|
}
|
|
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
|
|
logrus.WithField("dir", dir).WithError(err).Error("Error removing dir.")
|
|
}
|
|
}
|
|
|
|
func killProcessDirectly(cntr *container.Container) error {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
// Block until the container to stops or timeout.
|
|
status := <-cntr.Wait(ctx, container.WaitConditionNotRunning)
|
|
if status.Err() != nil {
|
|
// Ensure that we don't kill ourselves
|
|
if pid := cntr.GetPID(); pid != 0 {
|
|
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(cntr.ID))
|
|
if err := unix.Kill(pid, 9); err != nil {
|
|
if err != unix.ESRCH {
|
|
return err
|
|
}
|
|
e := errNoSuchProcess{pid, 9}
|
|
logrus.Debug(e)
|
|
return e
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isLinkable(child *container.Container) bool {
|
|
// A container is linkable only if it belongs to the default network
|
|
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
return ok
|
|
}
|
|
|
|
func enableIPOnPredefinedNetwork() bool {
|
|
return false
|
|
}
|
|
|
|
func (daemon *Daemon) isNetworkHotPluggable() bool {
|
|
return true
|
|
}
|
|
|
|
func (daemon *Daemon) setupPathsAndSandboxOptions(container *container.Container, sboxOptions *[]libnetwork.SandboxOption) error {
|
|
var err error
|
|
|
|
if container.HostConfig.NetworkMode.IsHost() {
|
|
// Point to the host files, so that will be copied into the container running in host mode
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath("/etc/resolv.conf"))
|
|
} else {
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath(daemon.configStore.GetResolvConf()))
|
|
}
|
|
|
|
container.HostsPath, err = container.GetRootResourcePath("hosts")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionHostsPath(container.HostsPath))
|
|
|
|
container.ResolvConfPath, err = container.GetRootResourcePath("resolv.conf")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionResolvConfPath(container.ResolvConfPath))
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) initializeNetworkingPaths(container *container.Container, nc *container.Container) error {
|
|
container.HostnamePath = nc.HostnamePath
|
|
container.HostsPath = nc.HostsPath
|
|
container.ResolvConfPath = nc.ResolvConfPath
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) setupContainerMountsRoot(c *container.Container) error {
|
|
// get the root mount path so we can make it unbindable
|
|
p, err := c.MountsResourcePath("")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return idtools.MkdirAllAndChown(p, 0700, daemon.idMapping.RootPair())
|
|
}
|