1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/container_operations_unix.go
Sebastiaan van Stijn a5324d6950
Better selection of DNS server
Commit e353e7e3f0 updated selection of the
`resolv.conf` file to use in situations where systemd-resolvd is used as
a resolver.

If a host uses `systemd-resolvd`, the system's `/etc/resolv.conf` file is
updated to set `127.0.0.53` as DNS, which is the local IP address for
systemd-resolvd. The DNS servers that are configured by the user will now
be stored in `/run/systemd/resolve/resolv.conf`, and systemd-resolvd acts
as a forwarding DNS for those.

Originally, Docker copied the DNS servers as configured in `/etc/resolv.conf`
as default DNS servers in containers, which failed to work if systemd-resolvd
is used (as `127.0.0.53` is not available inside the container's networking
namespace). To resolve this, e353e7e3f0 instead
detected if systemd-resolvd is in use, and in that case copied the "upstream"
DNS servers from the `/run/systemd/resolve/resolv.conf` configuration.

While this worked for most situations, it had some downsides, among which:

- we're skipping systemd-resolvd altogether, which means that we cannot take
  advantage of addition functionality provided by it (such as per-interface
  DNS servers)
- when updating DNS servers in the system's configuration, those changes were
  not reflected in the container configuration, which could be problematic in
  "developer" scenarios, when switching between networks.

This patch changes the way we select which resolv.conf to use as template
for the container's resolv.conf;

- in situations where a custom network is attached to the container, and the
  embedded DNS is available, we use `/etc/resolv.conf` unconditionally. If
  systemd-resolvd is used, the embedded DNS forwards external DNS lookups to
  systemd-resolvd, which in turn is responsible for forwarding requests to
  the external DNS servers configured by the user.
- if the container is running in "host mode" networking, we also use the
  DNS server that's configured in `/etc/resolv.conf`. In this situation, no
  embedded DNS server is available, but the container runs in the host's
  networking namespace, and can use the same DNS servers as the host (which
  could be systemd-resolvd or DNSMasq
- if the container uses the default (bridge) network, no embedded DNS is
  available, and the container has its own networking namespace. In this
  situation we check if systemd-resolvd is used, in which case we skip
  systemd-resolvd, and configure the upstream DNS servers as DNS for the
  container. This situation is the same as is used currently, which means
  that dynamically switching DNS servers won't be supported for these
  containers.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-05-25 18:20:56 +02:00

482 lines
16 KiB
Go

// +build linux freebsd
package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"time"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/links"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/pkg/system"
"github.com/docker/docker/runconfig"
"github.com/docker/libnetwork"
"github.com/moby/sys/mount"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
var env []string
children := daemon.children(container)
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if bridgeSettings == nil || bridgeSettings.EndpointSettings == nil {
return nil, nil
}
for linkAlias, child := range children {
if !child.IsRunning() {
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
}
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if childBridgeSettings == nil || childBridgeSettings.EndpointSettings == nil {
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
}
link := links.NewLink(
bridgeSettings.IPAddress,
childBridgeSettings.IPAddress,
linkAlias,
child.Config.Env,
child.Config.ExposedPorts,
)
env = append(env, link.ToEnv()...)
}
return env, nil
}
func (daemon *Daemon) getIpcContainer(id string) (*container.Container, error) {
errMsg := "can't join IPC of container " + id
// Check the container exists
ctr, err := daemon.GetContainer(id)
if err != nil {
return nil, errors.Wrap(err, errMsg)
}
// Check the container is running and not restarting
if err := daemon.checkContainer(ctr, containerIsRunning, containerIsNotRestarting); err != nil {
return nil, errors.Wrap(err, errMsg)
}
// Check the container ipc is shareable
if st, err := os.Stat(ctr.ShmPath); err != nil || !st.IsDir() {
if err == nil || os.IsNotExist(err) {
return nil, errors.New(errMsg + ": non-shareable IPC (hint: use IpcMode:shareable for the donor container)")
}
// stat() failed?
return nil, errors.Wrap(err, errMsg+": unexpected error from stat "+ctr.ShmPath)
}
return ctr, nil
}
func (daemon *Daemon) getPidContainer(ctr *container.Container) (*container.Container, error) {
containerID := ctr.HostConfig.PidMode.Container()
ctr, err := daemon.GetContainer(containerID)
if err != nil {
return nil, errors.Wrapf(err, "cannot join PID of a non running container: %s", containerID)
}
return ctr, daemon.checkContainer(ctr, containerIsRunning, containerIsNotRestarting)
}
func containerIsRunning(c *container.Container) error {
if !c.IsRunning() {
return errdefs.Conflict(errors.Errorf("container %s is not running", c.ID))
}
return nil
}
func containerIsNotRestarting(c *container.Container) error {
if c.IsRestarting() {
return errContainerIsRestarting(c.ID)
}
return nil
}
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
ipcMode := c.HostConfig.IpcMode
switch {
case ipcMode.IsContainer():
ic, err := daemon.getIpcContainer(ipcMode.Container())
if err != nil {
return err
}
c.ShmPath = ic.ShmPath
case ipcMode.IsHost():
if _, err := os.Stat("/dev/shm"); err != nil {
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
}
c.ShmPath = "/dev/shm"
case ipcMode.IsPrivate(), ipcMode.IsNone():
// c.ShmPath will/should not be used, so make it empty.
// Container's /dev/shm mount comes from OCI spec.
c.ShmPath = ""
case ipcMode.IsEmpty():
// A container was created by an older version of the daemon.
// The default behavior used to be what is now called "shareable".
fallthrough
case ipcMode.IsShareable():
rootIDs := daemon.idMapping.RootPair()
if !c.HasMountFor("/dev/shm") {
shmPath, err := c.ShmResourcePath()
if err != nil {
return err
}
if err := idtools.MkdirAllAndChown(shmPath, 0700, rootIDs); err != nil {
return err
}
shmproperty := "mode=1777,size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
if err := unix.Mount("shm", shmPath, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
return fmt.Errorf("mounting shm tmpfs: %s", err)
}
if err := os.Chown(shmPath, rootIDs.UID, rootIDs.GID); err != nil {
return err
}
c.ShmPath = shmPath
}
default:
return fmt.Errorf("invalid IPC mode: %v", ipcMode)
}
return nil
}
func (daemon *Daemon) setupSecretDir(c *container.Container) (setupErr error) {
if len(c.SecretReferences) == 0 && len(c.ConfigReferences) == 0 {
return nil
}
if err := daemon.createSecretsDir(c); err != nil {
return err
}
defer func() {
if setupErr != nil {
daemon.cleanupSecretDir(c)
}
}()
if c.DependencyStore == nil {
return fmt.Errorf("secret store is not initialized")
}
// retrieve possible remapped range start for root UID, GID
rootIDs := daemon.idMapping.RootPair()
for _, s := range c.SecretReferences {
// TODO (ehazlett): use type switch when more are supported
if s.File == nil {
logrus.Error("secret target type is not a file target")
continue
}
// secrets are created in the SecretMountPath on the host, at a
// single level
fPath, err := c.SecretFilePath(*s)
if err != nil {
return errors.Wrap(err, "error getting secret file path")
}
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating secret mount path")
}
logrus.WithFields(logrus.Fields{
"name": s.File.Name,
"path": fPath,
}).Debug("injecting secret")
secret, err := c.DependencyStore.Secrets().Get(s.SecretID)
if err != nil {
return errors.Wrap(err, "unable to get secret from secret store")
}
if err := ioutil.WriteFile(fPath, secret.Spec.Data, s.File.Mode); err != nil {
return errors.Wrap(err, "error injecting secret")
}
uid, err := strconv.Atoi(s.File.UID)
if err != nil {
return err
}
gid, err := strconv.Atoi(s.File.GID)
if err != nil {
return err
}
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
return errors.Wrap(err, "error setting ownership for secret")
}
if err := os.Chmod(fPath, s.File.Mode); err != nil {
return errors.Wrap(err, "error setting file mode for secret")
}
}
for _, configRef := range c.ConfigReferences {
// TODO (ehazlett): use type switch when more are supported
if configRef.File == nil {
// Runtime configs are not mounted into the container, but they're
// a valid type of config so we should not error when we encounter
// one.
if configRef.Runtime == nil {
logrus.Error("config target type is not a file or runtime target")
}
// However, in any case, this isn't a file config, so we have no
// further work to do
continue
}
fPath, err := c.ConfigFilePath(*configRef)
if err != nil {
return errors.Wrap(err, "error getting config file path for container")
}
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating config mount path")
}
logrus.WithFields(logrus.Fields{
"name": configRef.File.Name,
"path": fPath,
}).Debug("injecting config")
config, err := c.DependencyStore.Configs().Get(configRef.ConfigID)
if err != nil {
return errors.Wrap(err, "unable to get config from config store")
}
if err := ioutil.WriteFile(fPath, config.Spec.Data, configRef.File.Mode); err != nil {
return errors.Wrap(err, "error injecting config")
}
uid, err := strconv.Atoi(configRef.File.UID)
if err != nil {
return err
}
gid, err := strconv.Atoi(configRef.File.GID)
if err != nil {
return err
}
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
return errors.Wrap(err, "error setting ownership for config")
}
if err := os.Chmod(fPath, configRef.File.Mode); err != nil {
return errors.Wrap(err, "error setting file mode for config")
}
}
return daemon.remountSecretDir(c)
}
// createSecretsDir is used to create a dir suitable for storing container secrets.
// In practice this is using a tmpfs mount and is used for both "configs" and "secrets"
func (daemon *Daemon) createSecretsDir(c *container.Container) error {
// retrieve possible remapped range start for root UID, GID
rootIDs := daemon.idMapping.RootPair()
dir, err := c.SecretMountPath()
if err != nil {
return errors.Wrap(err, "error getting container secrets dir")
}
// create tmpfs
if err := idtools.MkdirAllAndChown(dir, 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating secret local mount path")
}
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
if err := mount.Mount("tmpfs", dir, "tmpfs", "nodev,nosuid,noexec,"+tmpfsOwnership); err != nil {
return errors.Wrap(err, "unable to setup secret mount")
}
return nil
}
func (daemon *Daemon) remountSecretDir(c *container.Container) error {
dir, err := c.SecretMountPath()
if err != nil {
return errors.Wrap(err, "error getting container secrets path")
}
if err := label.Relabel(dir, c.MountLabel, false); err != nil {
logrus.WithError(err).WithField("dir", dir).Warn("Error while attempting to set selinux label")
}
rootIDs := daemon.idMapping.RootPair()
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
// remount secrets ro
if err := mount.Mount("tmpfs", dir, "tmpfs", "remount,ro,"+tmpfsOwnership); err != nil {
return errors.Wrap(err, "unable to remount dir as readonly")
}
return nil
}
func (daemon *Daemon) cleanupSecretDir(c *container.Container) {
dir, err := c.SecretMountPath()
if err != nil {
logrus.WithError(err).WithField("container", c.ID).Warn("error getting secrets mount path for container")
}
if err := mount.RecursiveUnmount(dir); err != nil {
logrus.WithField("dir", dir).WithError(err).Warn("Error while attempting to unmount dir, this may prevent removal of container.")
}
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
logrus.WithField("dir", dir).WithError(err).Error("Error removing dir.")
}
}
func killProcessDirectly(cntr *container.Container) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// Block until the container to stops or timeout.
status := <-cntr.Wait(ctx, container.WaitConditionNotRunning)
if status.Err() != nil {
// Ensure that we don't kill ourselves
if pid := cntr.GetPID(); pid != 0 {
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(cntr.ID))
if err := unix.Kill(pid, 9); err != nil {
if err != unix.ESRCH {
return err
}
e := errNoSuchProcess{pid, 9}
logrus.Debug(e)
return e
}
// In case there were some exceptions(e.g., state of zombie and D)
if system.IsProcessAlive(pid) {
// Since we can not kill a zombie pid, add zombie check here
isZombie, err := system.IsProcessZombie(pid)
if err != nil {
logrus.Warnf("Container %s state is invalid", stringid.TruncateID(cntr.ID))
return err
}
if isZombie {
return errdefs.System(errors.Errorf("container %s PID %d is zombie and can not be killed. Use the --init option when creating containers to run an init inside the container that forwards signals and reaps processes", stringid.TruncateID(cntr.ID), pid))
}
}
}
}
return nil
}
func isLinkable(child *container.Container) bool {
// A container is linkable only if it belongs to the default network
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
return ok
}
func enableIPOnPredefinedNetwork() bool {
return false
}
// serviceDiscoveryOnDefaultNetwork indicates if service discovery is supported on the default network
func serviceDiscoveryOnDefaultNetwork() bool {
return false
}
func (daemon *Daemon) setupPathsAndSandboxOptions(container *container.Container, sboxOptions *[]libnetwork.SandboxOption) error {
var err error
// Set the correct paths for /etc/hosts and /etc/resolv.conf, based on the
// networking-mode of the container. Note that containers with "container"
// networking are already handled in "initializeNetworking()" before we reach
// this function, so do not have to be accounted for here.
switch {
case container.HostConfig.NetworkMode.IsHost():
// In host-mode networking, the container does not have its own networking
// namespace, so both `/etc/hosts` and `/etc/resolv.conf` should be the same
// as on the host itself. The container gets a copy of these files, but they
// may be symlinked, so resolve the original path first.
etcHosts, err := filepath.EvalSymlinks("/etc/hosts")
if err != nil {
return err
}
resolvConf, err := filepath.EvalSymlinks("/etc/resolv.conf")
if err != nil {
return err
}
*sboxOptions = append(
*sboxOptions,
libnetwork.OptionOriginHostsPath(etcHosts),
libnetwork.OptionOriginResolvConfPath(resolvConf),
)
case container.HostConfig.NetworkMode.IsUserDefined():
// The container uses a user-defined network. We use the embedded DNS
// server for container name resolution and to act as a DNS forwarder
// for external DNS resolution.
// We parse the DNS server(s) that are defined in /etc/resolv.conf on
// the host, which may be a local DNS server (for example, if DNSMasq or
// systemd-resolvd are in use). The embedded DNS server forwards DNS
// resolution to the DNS server configured on the host, which in itself
// may act as a forwarder for external DNS servers.
// If systemd-resolvd is used, the "upstream" DNS servers can be found in
// /run/systemd/resolve/resolv.conf. We do not query those DNS servers
// directly, as they can be dynamically reconfigured.
resolvConf, err := filepath.EvalSymlinks("/etc/resolv.conf")
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath(resolvConf))
default:
// For other situations, such as the default bridge network, container
// discovery / name resolution is handled through /etc/hosts, and no
// embedded DNS server is available. Without the embedded DNS, we
// cannot use local DNS servers on the host (for example, if DNSMasq or
// systemd-resolvd is used). If systemd-resolvd is used, we try to
// determine the external DNS servers that are used on the host.
// This situation is not ideal, because DNS servers configured in the
// container are not updated after the container is created, but the
// DNS servers on the host can be dynamically updated.
//
// Copy the host's resolv.conf for the container (/run/systemd/resolve/resolv.conf or /etc/resolv.conf)
resolvConf, err := filepath.EvalSymlinks(daemon.configStore.GetResolvConf())
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath(resolvConf))
}
container.HostsPath, err = container.GetRootResourcePath("hosts")
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionHostsPath(container.HostsPath))
container.ResolvConfPath, err = container.GetRootResourcePath("resolv.conf")
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionResolvConfPath(container.ResolvConfPath))
return nil
}
func (daemon *Daemon) initializeNetworkingPaths(container *container.Container, nc *container.Container) error {
container.HostnamePath = nc.HostnamePath
container.HostsPath = nc.HostsPath
container.ResolvConfPath = nc.ResolvConfPath
return nil
}
func (daemon *Daemon) setupContainerMountsRoot(c *container.Container) error {
// get the root mount path so we can make it unbindable
p, err := c.MountsResourcePath("")
if err != nil {
return err
}
return idtools.MkdirAllAndChown(p, 0700, daemon.idMapping.RootPair())
}