1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/container/container_unix.go
Brian Goff eaa5192856 Make container resource mounts unbindable
It's a common scenario for admins and/or monitoring applications to
mount in the daemon root dir into a container. When doing so all mounts
get coppied into the container, often with private references.
This can prevent removal of a container due to the various mounts that
must be configured before a container is started (for example, for
shared /dev/shm, or secrets) being leaked into another namespace,
usually with private references.

This is particularly problematic on older kernels (e.g. RHEL < 7.4)
where a mount may be active in another namespace and attempting to
remove a mountpoint which is active in another namespace fails.

This change moves all container resource mounts into a common directory
so that the directory can be made unbindable.
What this does is prevents sub-mounts of this new directory from leaking
into other namespaces when mounted with `rbind`... which is how all
binds are handled for containers.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
2018-01-16 15:09:05 -05:00

498 lines
15 KiB
Go

// +build linux freebsd
package container
import (
"io/ioutil"
"os"
"github.com/docker/docker/api/types"
containertypes "github.com/docker/docker/api/types/container"
mounttypes "github.com/docker/docker/api/types/mount"
"github.com/docker/docker/pkg/chrootarchive"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/pkg/system"
"github.com/docker/docker/volume"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
const (
// DefaultStopTimeout is the timeout (in seconds) for the syscall signal used to stop a container.
DefaultStopTimeout = 10
containerSecretMountPath = "/run/secrets"
)
// TrySetNetworkMount attempts to set the network mounts given a provided destination and
// the path to use for it; return true if the given destination was a network mount file
func (container *Container) TrySetNetworkMount(destination string, path string) bool {
if destination == "/etc/resolv.conf" {
container.ResolvConfPath = path
return true
}
if destination == "/etc/hostname" {
container.HostnamePath = path
return true
}
if destination == "/etc/hosts" {
container.HostsPath = path
return true
}
return false
}
// BuildHostnameFile writes the container's hostname file.
func (container *Container) BuildHostnameFile() error {
hostnamePath, err := container.GetRootResourcePath("hostname")
if err != nil {
return err
}
container.HostnamePath = hostnamePath
return ioutil.WriteFile(container.HostnamePath, []byte(container.Config.Hostname+"\n"), 0644)
}
// NetworkMounts returns the list of network mounts.
func (container *Container) NetworkMounts() []Mount {
var mounts []Mount
shared := container.HostConfig.NetworkMode.IsContainer()
parser := volume.NewParser(container.OS)
if container.ResolvConfPath != "" {
if _, err := os.Stat(container.ResolvConfPath); err != nil {
logrus.Warnf("ResolvConfPath set to %q, but can't stat this filename (err = %v); skipping", container.ResolvConfPath, err)
} else {
writable := !container.HostConfig.ReadonlyRootfs
if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
writable = m.RW
} else {
label.Relabel(container.ResolvConfPath, container.MountLabel, shared)
}
mounts = append(mounts, Mount{
Source: container.ResolvConfPath,
Destination: "/etc/resolv.conf",
Writable: writable,
Propagation: string(parser.DefaultPropagationMode()),
})
}
}
if container.HostnamePath != "" {
if _, err := os.Stat(container.HostnamePath); err != nil {
logrus.Warnf("HostnamePath set to %q, but can't stat this filename (err = %v); skipping", container.HostnamePath, err)
} else {
writable := !container.HostConfig.ReadonlyRootfs
if m, exists := container.MountPoints["/etc/hostname"]; exists {
writable = m.RW
} else {
label.Relabel(container.HostnamePath, container.MountLabel, shared)
}
mounts = append(mounts, Mount{
Source: container.HostnamePath,
Destination: "/etc/hostname",
Writable: writable,
Propagation: string(parser.DefaultPropagationMode()),
})
}
}
if container.HostsPath != "" {
if _, err := os.Stat(container.HostsPath); err != nil {
logrus.Warnf("HostsPath set to %q, but can't stat this filename (err = %v); skipping", container.HostsPath, err)
} else {
writable := !container.HostConfig.ReadonlyRootfs
if m, exists := container.MountPoints["/etc/hosts"]; exists {
writable = m.RW
} else {
label.Relabel(container.HostsPath, container.MountLabel, shared)
}
mounts = append(mounts, Mount{
Source: container.HostsPath,
Destination: "/etc/hosts",
Writable: writable,
Propagation: string(parser.DefaultPropagationMode()),
})
}
}
return mounts
}
// CopyImagePathContent copies files in destination to the volume.
func (container *Container) CopyImagePathContent(v volume.Volume, destination string) error {
rootfs, err := container.GetResourcePath(destination)
if err != nil {
return err
}
if _, err = ioutil.ReadDir(rootfs); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
id := stringid.GenerateNonCryptoID()
path, err := v.Mount(id)
if err != nil {
return err
}
defer func() {
if err := v.Unmount(id); err != nil {
logrus.Warnf("error while unmounting volume %s: %v", v.Name(), err)
}
}()
if err := label.Relabel(path, container.MountLabel, true); err != nil && err != unix.ENOTSUP {
return err
}
return copyExistingContents(rootfs, path)
}
// ShmResourcePath returns path to shm
func (container *Container) ShmResourcePath() (string, error) {
return container.MountsResourcePath("shm")
}
// HasMountFor checks if path is a mountpoint
func (container *Container) HasMountFor(path string) bool {
_, exists := container.MountPoints[path]
if exists {
return true
}
// Also search among the tmpfs mounts
for dest := range container.HostConfig.Tmpfs {
if dest == path {
return true
}
}
return false
}
// UnmountIpcMount uses the provided unmount function to unmount shm if it was mounted
func (container *Container) UnmountIpcMount(unmount func(pth string) error) error {
if container.HasMountFor("/dev/shm") {
return nil
}
// container.ShmPath should not be used here as it may point
// to the host's or other container's /dev/shm
shmPath, err := container.ShmResourcePath()
if err != nil {
return err
}
if shmPath == "" {
return nil
}
if err = unmount(shmPath); err != nil && !os.IsNotExist(err) {
if mounted, mErr := mount.Mounted(shmPath); mounted || mErr != nil {
return errors.Wrapf(err, "umount %s", shmPath)
}
}
return nil
}
// IpcMounts returns the list of IPC mounts
func (container *Container) IpcMounts() []Mount {
var mounts []Mount
parser := volume.NewParser(container.OS)
if container.HasMountFor("/dev/shm") {
return mounts
}
if container.ShmPath == "" {
return mounts
}
label.SetFileLabel(container.ShmPath, container.MountLabel)
mounts = append(mounts, Mount{
Source: container.ShmPath,
Destination: "/dev/shm",
Writable: true,
Propagation: string(parser.DefaultPropagationMode()),
})
return mounts
}
// SecretMounts returns the mounts for the secret path.
func (container *Container) SecretMounts() ([]Mount, error) {
var mounts []Mount
for _, r := range container.SecretReferences {
if r.File == nil {
continue
}
src, err := container.SecretFilePath(*r)
if err != nil {
return nil, err
}
mounts = append(mounts, Mount{
Source: src,
Destination: getSecretTargetPath(r),
Writable: false,
})
}
return mounts, nil
}
// UnmountSecrets unmounts the local tmpfs for secrets
func (container *Container) UnmountSecrets() error {
p, err := container.SecretMountPath()
if err != nil {
return err
}
if _, err := os.Stat(p); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return mount.RecursiveUnmount(p)
}
// ConfigMounts returns the mounts for configs.
func (container *Container) ConfigMounts() ([]Mount, error) {
var mounts []Mount
for _, configRef := range container.ConfigReferences {
if configRef.File == nil {
continue
}
src, err := container.ConfigFilePath(*configRef)
if err != nil {
return nil, err
}
mounts = append(mounts, Mount{
Source: src,
Destination: configRef.File.Name,
Writable: false,
})
}
return mounts, nil
}
type conflictingUpdateOptions string
func (e conflictingUpdateOptions) Error() string {
return string(e)
}
func (e conflictingUpdateOptions) Conflict() {}
// UpdateContainer updates configuration of a container. Callers must hold a Lock on the Container.
func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
// update resources of container
resources := hostConfig.Resources
cResources := &container.HostConfig.Resources
// validate NanoCPUs, CPUPeriod, and CPUQuota
// Because NanoCPU effectively updates CPUPeriod/CPUQuota,
// once NanoCPU is already set, updating CPUPeriod/CPUQuota will be blocked, and vice versa.
// In the following we make sure the intended update (resources) does not conflict with the existing (cResource).
if resources.NanoCPUs > 0 && cResources.CPUPeriod > 0 {
return conflictingUpdateOptions("Conflicting options: Nano CPUs cannot be updated as CPU Period has already been set")
}
if resources.NanoCPUs > 0 && cResources.CPUQuota > 0 {
return conflictingUpdateOptions("Conflicting options: Nano CPUs cannot be updated as CPU Quota has already been set")
}
if resources.CPUPeriod > 0 && cResources.NanoCPUs > 0 {
return conflictingUpdateOptions("Conflicting options: CPU Period cannot be updated as NanoCPUs has already been set")
}
if resources.CPUQuota > 0 && cResources.NanoCPUs > 0 {
return conflictingUpdateOptions("Conflicting options: CPU Quota cannot be updated as NanoCPUs has already been set")
}
if resources.BlkioWeight != 0 {
cResources.BlkioWeight = resources.BlkioWeight
}
if resources.CPUShares != 0 {
cResources.CPUShares = resources.CPUShares
}
if resources.NanoCPUs != 0 {
cResources.NanoCPUs = resources.NanoCPUs
}
if resources.CPUPeriod != 0 {
cResources.CPUPeriod = resources.CPUPeriod
}
if resources.CPUQuota != 0 {
cResources.CPUQuota = resources.CPUQuota
}
if resources.CpusetCpus != "" {
cResources.CpusetCpus = resources.CpusetCpus
}
if resources.CpusetMems != "" {
cResources.CpusetMems = resources.CpusetMems
}
if resources.Memory != 0 {
// if memory limit smaller than already set memoryswap limit and doesn't
// update the memoryswap limit, then error out.
if resources.Memory > cResources.MemorySwap && resources.MemorySwap == 0 {
return conflictingUpdateOptions("Memory limit should be smaller than already set memoryswap limit, update the memoryswap at the same time")
}
cResources.Memory = resources.Memory
}
if resources.MemorySwap != 0 {
cResources.MemorySwap = resources.MemorySwap
}
if resources.MemoryReservation != 0 {
cResources.MemoryReservation = resources.MemoryReservation
}
if resources.KernelMemory != 0 {
cResources.KernelMemory = resources.KernelMemory
}
if resources.CPURealtimePeriod != 0 {
cResources.CPURealtimePeriod = resources.CPURealtimePeriod
}
if resources.CPURealtimeRuntime != 0 {
cResources.CPURealtimeRuntime = resources.CPURealtimeRuntime
}
// update HostConfig of container
if hostConfig.RestartPolicy.Name != "" {
if container.HostConfig.AutoRemove && !hostConfig.RestartPolicy.IsNone() {
return conflictingUpdateOptions("Restart policy cannot be updated because AutoRemove is enabled for the container")
}
container.HostConfig.RestartPolicy = hostConfig.RestartPolicy
}
return nil
}
// DetachAndUnmount uses a detached mount on all mount destinations, then
// unmounts each volume normally.
// This is used from daemon/archive for `docker cp`
func (container *Container) DetachAndUnmount(volumeEventLog func(name, action string, attributes map[string]string)) error {
networkMounts := container.NetworkMounts()
mountPaths := make([]string, 0, len(container.MountPoints)+len(networkMounts))
for _, mntPoint := range container.MountPoints {
dest, err := container.GetResourcePath(mntPoint.Destination)
if err != nil {
logrus.Warnf("Failed to get volume destination path for container '%s' at '%s' while lazily unmounting: %v", container.ID, mntPoint.Destination, err)
continue
}
mountPaths = append(mountPaths, dest)
}
for _, m := range networkMounts {
dest, err := container.GetResourcePath(m.Destination)
if err != nil {
logrus.Warnf("Failed to get volume destination path for container '%s' at '%s' while lazily unmounting: %v", container.ID, m.Destination, err)
continue
}
mountPaths = append(mountPaths, dest)
}
for _, mountPath := range mountPaths {
if err := detachMounted(mountPath); err != nil {
logrus.Warnf("%s unmountVolumes: Failed to do lazy umount fo volume '%s': %v", container.ID, mountPath, err)
}
}
return container.UnmountVolumes(volumeEventLog)
}
// copyExistingContents copies from the source to the destination and
// ensures the ownership is appropriately set.
func copyExistingContents(source, destination string) error {
volList, err := ioutil.ReadDir(source)
if err != nil {
return err
}
if len(volList) > 0 {
srcList, err := ioutil.ReadDir(destination)
if err != nil {
return err
}
if len(srcList) == 0 {
// If the source volume is empty, copies files from the root into the volume
if err := chrootarchive.NewArchiver(nil).CopyWithTar(source, destination); err != nil {
return err
}
}
}
return copyOwnership(source, destination)
}
// copyOwnership copies the permissions and uid:gid of the source file
// to the destination file
func copyOwnership(source, destination string) error {
stat, err := system.Stat(source)
if err != nil {
return err
}
destStat, err := system.Stat(destination)
if err != nil {
return err
}
// In some cases, even though UID/GID match and it would effectively be a no-op,
// this can return a permission denied error... for example if this is an NFS
// mount.
// Since it's not really an error that we can't chown to the same UID/GID, don't
// even bother trying in such cases.
if stat.UID() != destStat.UID() || stat.GID() != destStat.GID() {
if err := os.Chown(destination, int(stat.UID()), int(stat.GID())); err != nil {
return err
}
}
if stat.Mode() != destStat.Mode() {
return os.Chmod(destination, os.FileMode(stat.Mode()))
}
return nil
}
// TmpfsMounts returns the list of tmpfs mounts
func (container *Container) TmpfsMounts() ([]Mount, error) {
parser := volume.NewParser(container.OS)
var mounts []Mount
for dest, data := range container.HostConfig.Tmpfs {
mounts = append(mounts, Mount{
Source: "tmpfs",
Destination: dest,
Data: data,
})
}
for dest, mnt := range container.MountPoints {
if mnt.Type == mounttypes.TypeTmpfs {
data, err := parser.ConvertTmpfsOptions(mnt.Spec.TmpfsOptions, mnt.Spec.ReadOnly)
if err != nil {
return nil, err
}
mounts = append(mounts, Mount{
Source: "tmpfs",
Destination: dest,
Data: data,
})
}
}
return mounts, nil
}
// EnableServiceDiscoveryOnDefaultNetwork Enable service discovery on default network
func (container *Container) EnableServiceDiscoveryOnDefaultNetwork() bool {
return false
}
// GetMountPoints gives a platform specific transformation to types.MountPoint. Callers must hold a Container lock.
func (container *Container) GetMountPoints() []types.MountPoint {
mountPoints := make([]types.MountPoint, 0, len(container.MountPoints))
for _, m := range container.MountPoints {
mountPoints = append(mountPoints, types.MountPoint{
Type: m.Type,
Name: m.Name,
Source: m.Path(),
Destination: m.Destination,
Driver: m.Driver,
Mode: m.Mode,
RW: m.RW,
Propagation: m.Propagation,
})
}
return mountPoints
}