From 589a0afa8cbe39b6512662fd1705873e2d236dd0 Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Thu, 18 Jan 2018 16:55:27 -0500 Subject: [PATCH] Use rslave propagation for mounts from daemon root By default, if a user requests a bind mount it uses private propagation. When the source path is a path within the daemon root this, along with some other propagation values that the user can use, causes issues when the daemon tries to remove a mountpoint because a container will then have a private reference to that mount which prevents removal. Unmouting with MNT_DETATCH can help this scenario on newer kernels, but ultimately this is just covering up the problem and doesn't actually free up the underlying resources until all references are destroyed. This change does essentially 2 things: 1. Change the default propagation when unspecified to `rslave` when the source path is within the daemon root path or a parent of the daemon root (because everything is using rbinds). 2. Creates a validation error on create when the user tries to specify an unacceptable propagation mode for these paths... basically the only two acceptable modes are `rslave` and `rshared`. In cases where we have used the new default propagation but the underlying filesystem is not setup to handle it (fs must hvae at least rshared propagation) instead of erroring out like we normally would, this falls back to the old default mode of `private`, which preserves backwards compatibility. Signed-off-by: Brian Goff --- daemon/oci_linux.go | 34 +++++- daemon/volumes.go | 15 +++ daemon/volumes_linux.go | 36 ++++++ daemon/volumes_linux_test.go | 56 +++++++++ daemon/volumes_windows.go | 5 + integration/container/mounts_linux_test.go | 136 ++++++++++++++++++++- 6 files changed, 272 insertions(+), 10 deletions(-) create mode 100644 daemon/volumes_linux.go create mode 100644 daemon/volumes_linux_test.go diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index dbc26e8efe..87a22d50eb 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -604,7 +604,8 @@ func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []c // // For private volumes any root propagation value should work. pFlag := mountPropagationMap[m.Propagation] - if pFlag == mount.SHARED || pFlag == mount.RSHARED { + switch pFlag { + case mount.SHARED, mount.RSHARED: if err := ensureShared(m.Source); err != nil { return err } @@ -612,13 +613,34 @@ func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []c if rootpg != mount.SHARED && rootpg != mount.RSHARED { s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED] } - } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE { + case mount.SLAVE, mount.RSLAVE: + var fallback bool if err := ensureSharedOrSlave(m.Source); err != nil { - return err + // For backwards compatability purposes, treat mounts from the daemon root + // as special since we automatically add rslave propagation to these mounts + // when the user did not set anything, so we should fallback to the old + // behavior which is to use private propagation which is normally the + // default. + if !strings.HasPrefix(m.Source, daemon.root) && !strings.HasPrefix(daemon.root, m.Source) { + return err + } + + cm, ok := c.MountPoints[m.Destination] + if !ok { + return err + } + if cm.Spec.BindOptions != nil && cm.Spec.BindOptions.Propagation != "" { + // This means the user explicitly set a propagation, do not fallback in that case. + return err + } + fallback = true + logrus.WithField("container", c.ID).WithField("source", m.Source).Warn("Falling back to default propagation for bind source in daemon root") } - rootpg := mountPropagationMap[s.Linux.RootfsPropagation] - if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { - s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE] + if !fallback { + rootpg := mountPropagationMap[s.Linux.RootfsPropagation] + if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { + s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE] + } } } diff --git a/daemon/volumes.go b/daemon/volumes.go index 2e75feebda..7833ea2200 100644 --- a/daemon/volumes.go +++ b/daemon/volumes.go @@ -10,6 +10,7 @@ import ( "github.com/docker/docker/api/types" containertypes "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" mounttypes "github.com/docker/docker/api/types/mount" "github.com/docker/docker/container" "github.com/docker/docker/errdefs" @@ -146,6 +147,13 @@ func (daemon *Daemon) registerMountPoints(container *container.Container, hostCo if err != nil { return err } + needsSlavePropagation, err := daemon.validateBindDaemonRoot(bind.Spec) + if err != nil { + return err + } + if needsSlavePropagation { + bind.Propagation = mount.PropagationRSlave + } // #10618 _, tmpfsExists := hostConfig.Tmpfs[bind.Destination] @@ -178,6 +186,13 @@ func (daemon *Daemon) registerMountPoints(container *container.Container, hostCo if err != nil { return errdefs.InvalidParameter(err) } + needsSlavePropagation, err := daemon.validateBindDaemonRoot(mp.Spec) + if err != nil { + return err + } + if needsSlavePropagation { + mp.Propagation = mount.PropagationRSlave + } if binds[mp.Destination] { return duplicateMountPointError(cfg.Target) diff --git a/daemon/volumes_linux.go b/daemon/volumes_linux.go new file mode 100644 index 0000000000..cf3d9ed159 --- /dev/null +++ b/daemon/volumes_linux.go @@ -0,0 +1,36 @@ +package daemon + +import ( + "strings" + + "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/errdefs" + "github.com/pkg/errors" +) + +// validateBindDaemonRoot ensures that if a given mountpoint's source is within +// the daemon root path, that the propagation is setup to prevent a container +// from holding private refereneces to a mount within the daemon root, which +// can cause issues when the daemon attempts to remove the mountpoint. +func (daemon *Daemon) validateBindDaemonRoot(m mount.Mount) (bool, error) { + if m.Type != mount.TypeBind { + return false, nil + } + + // check if the source is within the daemon root, or if the daemon root is within the source + if !strings.HasPrefix(m.Source, daemon.root) && !strings.HasPrefix(daemon.root, m.Source) { + return false, nil + } + + if m.BindOptions == nil { + return true, nil + } + + switch m.BindOptions.Propagation { + case mount.PropagationRSlave, mount.PropagationRShared, "": + return m.BindOptions.Propagation == "", nil + default: + } + + return false, errdefs.InvalidParameter(errors.Errorf(`invalid mount config: must use either propagation mode "rslave" or "rshared" when mount source is within the daemon root, daemon root: %q, bind mount source: %q, propagation: %q`, daemon.root, m.Source, m.BindOptions.Propagation)) +} diff --git a/daemon/volumes_linux_test.go b/daemon/volumes_linux_test.go new file mode 100644 index 0000000000..72830c3e81 --- /dev/null +++ b/daemon/volumes_linux_test.go @@ -0,0 +1,56 @@ +package daemon + +import ( + "path/filepath" + "testing" + + "github.com/docker/docker/api/types/mount" +) + +func TestBindDaemonRoot(t *testing.T) { + t.Parallel() + d := &Daemon{root: "/a/b/c/daemon"} + for _, test := range []struct { + desc string + opts *mount.BindOptions + needsProp bool + err bool + }{ + {desc: "nil propagation settings", opts: nil, needsProp: true, err: false}, + {desc: "empty propagation settings", opts: &mount.BindOptions{}, needsProp: true, err: false}, + {desc: "private propagation", opts: &mount.BindOptions{Propagation: mount.PropagationPrivate}, err: true}, + {desc: "rprivate propagation", opts: &mount.BindOptions{Propagation: mount.PropagationRPrivate}, err: true}, + {desc: "slave propagation", opts: &mount.BindOptions{Propagation: mount.PropagationSlave}, err: true}, + {desc: "rslave propagation", opts: &mount.BindOptions{Propagation: mount.PropagationRSlave}, err: false, needsProp: false}, + {desc: "shared propagation", opts: &mount.BindOptions{Propagation: mount.PropagationShared}, err: true}, + {desc: "rshared propagation", opts: &mount.BindOptions{Propagation: mount.PropagationRSlave}, err: false, needsProp: false}, + } { + t.Run(test.desc, func(t *testing.T) { + test := test + for desc, source := range map[string]string{ + "source is root": d.root, + "source is subpath": filepath.Join(d.root, "a", "b"), + "source is parent": filepath.Dir(d.root), + "source is /": "/", + } { + t.Run(desc, func(t *testing.T) { + mount := mount.Mount{ + Type: mount.TypeBind, + Source: source, + BindOptions: test.opts, + } + needsProp, err := d.validateBindDaemonRoot(mount) + if (err != nil) != test.err { + t.Fatalf("expected err=%v, got: %v", test.err, err) + } + if test.err { + return + } + if test.needsProp != needsProp { + t.Fatalf("expected needsProp=%v, got: %v", test.needsProp, needsProp) + } + }) + } + }) + } +} diff --git a/daemon/volumes_windows.go b/daemon/volumes_windows.go index bfb5133d3d..aced2665e0 100644 --- a/daemon/volumes_windows.go +++ b/daemon/volumes_windows.go @@ -3,6 +3,7 @@ package daemon import ( "sort" + "github.com/docker/docker/api/types/mount" "github.com/docker/docker/container" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/volume" @@ -44,3 +45,7 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er func setBindModeIfNull(bind *volume.MountPoint) { return } + +func (daemon *Daemon) validateBindDaemonRoot(m mount.Mount) (bool, error) { + return false, nil +} diff --git a/integration/container/mounts_linux_test.go b/integration/container/mounts_linux_test.go index eab0fd5d74..368234f708 100644 --- a/integration/container/mounts_linux_test.go +++ b/integration/container/mounts_linux_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "fmt" + "path/filepath" "testing" "github.com/docker/docker/api/types" @@ -12,6 +13,7 @@ import ( "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" "github.com/docker/docker/integration-cli/daemon" + "github.com/docker/docker/integration/util/request" "github.com/docker/docker/pkg/stdcopy" "github.com/docker/docker/pkg/system" "github.com/gotestyourself/gotestyourself/fs" @@ -51,10 +53,10 @@ func TestContainerShmNoLeak(t *testing.T) { hc := container.HostConfig{ Mounts: []mount.Mount{ { - Type: mount.TypeBind, - Source: d.Root, - Target: "/testdaemonroot", - BindOptions: &mount.BindOptions{Propagation: mount.PropagationRPrivate}}, + Type: mount.TypeBind, + Source: d.Root, + Target: "/testdaemonroot", + }, }, } cfg.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("mount | grep testdaemonroot | grep containers | grep %s", ctr.ID)} @@ -141,3 +143,129 @@ func TestContainerNetworkMountsNoChown(t *testing.T) { require.NoError(t, err) assert.Equal(t, uint32(0), statT.UID(), "bind mounted network file should not change ownership from root") } + +func TestMountDaemonRoot(t *testing.T) { + t.Parallel() + + client := request.NewAPIClient(t) + ctx := context.Background() + info, err := client.Info(ctx) + if err != nil { + t.Fatal(err) + } + + for _, test := range []struct { + desc string + propagation mount.Propagation + expected mount.Propagation + }{ + { + desc: "default", + propagation: "", + expected: mount.PropagationRSlave, + }, + { + desc: "private", + propagation: mount.PropagationPrivate, + }, + { + desc: "rprivate", + propagation: mount.PropagationRPrivate, + }, + { + desc: "slave", + propagation: mount.PropagationSlave, + }, + { + desc: "rslave", + propagation: mount.PropagationRSlave, + expected: mount.PropagationRSlave, + }, + { + desc: "shared", + propagation: mount.PropagationShared, + }, + { + desc: "rshared", + propagation: mount.PropagationRShared, + expected: mount.PropagationRShared, + }, + } { + t.Run(test.desc, func(t *testing.T) { + test := test + t.Parallel() + + propagationSpec := fmt.Sprintf(":%s", test.propagation) + if test.propagation == "" { + propagationSpec = "" + } + bindSpecRoot := info.DockerRootDir + ":" + "/foo" + propagationSpec + bindSpecSub := filepath.Join(info.DockerRootDir, "containers") + ":/foo" + propagationSpec + + for name, hc := range map[string]*container.HostConfig{ + "bind root": {Binds: []string{bindSpecRoot}}, + "bind subpath": {Binds: []string{bindSpecSub}}, + "mount root": { + Mounts: []mount.Mount{ + { + Type: mount.TypeBind, + Source: info.DockerRootDir, + Target: "/foo", + BindOptions: &mount.BindOptions{Propagation: test.propagation}, + }, + }, + }, + "mount subpath": { + Mounts: []mount.Mount{ + { + Type: mount.TypeBind, + Source: filepath.Join(info.DockerRootDir, "containers"), + Target: "/foo", + BindOptions: &mount.BindOptions{Propagation: test.propagation}, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + hc := hc + t.Parallel() + + c, err := client.ContainerCreate(ctx, &container.Config{ + Image: "busybox", + Cmd: []string{"true"}, + }, hc, nil, "") + + if err != nil { + if test.expected != "" { + t.Fatal(err) + } + // expected an error, so this is ok and should not continue + return + } + if test.expected == "" { + t.Fatal("expected create to fail") + } + + defer func() { + if err := client.ContainerRemove(ctx, c.ID, types.ContainerRemoveOptions{Force: true}); err != nil { + panic(err) + } + }() + + inspect, err := client.ContainerInspect(ctx, c.ID) + if err != nil { + t.Fatal(err) + } + if len(inspect.Mounts) != 1 { + t.Fatalf("unexpected number of mounts: %+v", inspect.Mounts) + } + + m := inspect.Mounts[0] + if m.Propagation != test.expected { + t.Fatalf("got unexpected propagation mode, expected %q, got: %v", test.expected, m.Propagation) + } + }) + } + }) + } +}