From 11cf394e5ea964636294a219872b188fe5bdf4dd Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Mon, 26 Jun 2017 14:54:14 -0400 Subject: [PATCH] Make plugin removes more resilient to failure Before this patch, if the plugin's `config.json` is successfully removed but the main plugin state dir could not be removed for some reason (e.g. leaked mount), it will prevent the daemon from being able to be restarted. This patches changes this to atomically remove the plugin such that on daemon restart we can detect that there was an error and re-try. It also changes the logic so that it only logs errors on restore rather than erroring out the daemon. This also removes some code which is now duplicated elsewhere. Signed-off-by: Brian Goff --- integration-cli/docker_cli_daemon_test.go | 46 +++++++++++++++++++++++ plugin/backend_linux.go | 39 ++++++------------- plugin/manager.go | 23 +++++++++++- plugin/manager_linux.go | 2 +- 4 files changed, 81 insertions(+), 29 deletions(-) diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index 9046fe6ea6..2a295c946e 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -5,6 +5,7 @@ package main import ( "bufio" "bytes" + "context" "encoding/json" "fmt" "io" @@ -25,6 +26,9 @@ import ( "crypto/x509" "github.com/cloudflare/cfssl/helpers" + "github.com/docker/docker/api" + "github.com/docker/docker/api/types" + "github.com/docker/docker/client" "github.com/docker/docker/integration-cli/checker" "github.com/docker/docker/integration-cli/cli" "github.com/docker/docker/integration-cli/daemon" @@ -2980,3 +2984,45 @@ func (s *DockerDaemonSuite) TestShmSizeReload(c *check.C) { c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) c.Assert(strings.TrimSpace(out), check.Equals, fmt.Sprintf("%v", size)) } + +// TestFailedPluginRemove makes sure that a failed plugin remove does not block +// the daemon from starting +func (s *DockerDaemonSuite) TestFailedPluginRemove(c *check.C) { + testRequires(c, DaemonIsLinux, IsAmd64, SameHostDaemon) + d := daemon.New(c, dockerBinary, dockerdBinary, daemon.Config{}) + d.Start(c) + cli, err := client.NewClient(d.Sock(), api.DefaultVersion, nil, nil) + c.Assert(err, checker.IsNil) + + ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second) + defer cancel() + + name := "test-plugin-rm-fail" + out, err := cli.PluginInstall(ctx, name, types.PluginInstallOptions{ + Disabled: true, + AcceptAllPermissions: true, + RemoteRef: "cpuguy83/docker-logdriver-test", + }) + c.Assert(err, checker.IsNil) + defer out.Close() + io.Copy(ioutil.Discard, out) + + ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + p, _, err := cli.PluginInspectWithRaw(ctx, name) + c.Assert(err, checker.IsNil) + + // simulate a bad/partial removal by removing the plugin config. + configPath := filepath.Join(d.Root, "plugins", p.ID, "config.json") + c.Assert(os.Remove(configPath), checker.IsNil) + + d.Restart(c) + ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _, err = cli.Ping(ctx) + c.Assert(err, checker.IsNil) + + _, _, err = cli.PluginInspectWithRaw(ctx, name) + // plugin should be gone since the config.json is gone + c.Assert(err, checker.NotNil) +} diff --git a/plugin/backend_linux.go b/plugin/backend_linux.go index c2baab5c94..b22cc155d2 100644 --- a/plugin/backend_linux.go +++ b/plugin/backend_linux.go @@ -13,7 +13,6 @@ import ( "os" "path" "path/filepath" - "sort" "strings" "github.com/Sirupsen/logrus" @@ -32,6 +31,7 @@ import ( "github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/pools" "github.com/docker/docker/pkg/progress" + "github.com/docker/docker/pkg/system" "github.com/docker/docker/plugin/v2" refstore "github.com/docker/docker/reference" "github.com/opencontainers/go-digest" @@ -624,14 +624,20 @@ func (pm *Manager) Remove(name string, config *types.PluginRmConfig) error { }() id := p.GetID() - pm.config.Store.Remove(p) pluginDir := filepath.Join(pm.config.Root, id) - if err := recursiveUnmount(pluginDir); err != nil { - logrus.WithField("dir", pluginDir).WithField("id", id).Warn(err) + + if err := mount.RecursiveUnmount(pluginDir); err != nil { + return errors.Wrap(err, "error unmounting plugin data") } - if err := os.RemoveAll(pluginDir); err != nil { - logrus.Warnf("unable to remove %q from plugin remove: %v", pluginDir, err) + + if err := os.Rename(pluginDir, pluginDir+"-removing"); err != nil { + return errors.Wrap(err, "error performing atomic remove of plugin dir") } + + if err := system.EnsureRemoveAll(pluginDir); err != nil { + return errors.Wrap(err, "error removing plugin dir") + } + pm.config.Store.Remove(p) pm.config.LogPluginEvent(id, name, "remove") return nil } @@ -652,27 +658,6 @@ func getMounts(root string) ([]string, error) { return mounts, nil } -func recursiveUnmount(root string) error { - mounts, err := getMounts(root) - if err != nil { - return err - } - - // sort in reverse-lexicographic order so the root mount will always be last - sort.Sort(sort.Reverse(sort.StringSlice(mounts))) - - for i, m := range mounts { - if err := mount.Unmount(m); err != nil { - if i == len(mounts)-1 { - return errors.Wrapf(err, "error performing recursive unmount on %s", root) - } - logrus.WithError(err).WithField("mountpoint", m).Warn("could not unmount") - } - } - - return nil -} - // Set sets plugin args func (pm *Manager) Set(name string, args []string) error { p, err := pm.config.Store.GetV2Plugin(name) diff --git a/plugin/manager.go b/plugin/manager.go index 02d372741f..580152a930 100644 --- a/plugin/manager.go +++ b/plugin/manager.go @@ -163,6 +163,19 @@ func (pm *Manager) StateChanged(id string, e libcontainerd.StateInfo) error { return nil } +func handleLoadError(err error, id string) { + if err == nil { + return + } + logger := logrus.WithError(err).WithField("id", id) + if os.IsNotExist(errors.Cause(err)) { + // Likely some error while removing on an older version of docker + logger.Warn("missing plugin config, skipping: this may be caused due to a failed remove and requires manual cleanup.") + return + } + logger.Error("error loading plugin, skipping") +} + func (pm *Manager) reload() error { // todo: restore dir, err := ioutil.ReadDir(pm.config.Root) if err != nil { @@ -173,9 +186,17 @@ func (pm *Manager) reload() error { // todo: restore if validFullID.MatchString(v.Name()) { p, err := pm.loadPlugin(v.Name()) if err != nil { - return err + handleLoadError(err, v.Name()) + continue } plugins[p.GetID()] = p + } else { + if validFullID.MatchString(strings.TrimSuffix(v.Name(), "-removing")) { + // There was likely some error while removing this plugin, let's try to remove again here + if err := system.EnsureRemoveAll(v.Name()); err != nil { + logrus.WithError(err).WithField("id", v.Name()).Warn("error while attempting to clean up previously removed plugin") + } + } } } diff --git a/plugin/manager_linux.go b/plugin/manager_linux.go index 1e8994043f..678be84b3f 100644 --- a/plugin/manager_linux.go +++ b/plugin/manager_linux.go @@ -204,7 +204,7 @@ func (pm *Manager) upgradePlugin(p *v2.Plugin, configDigest digest.Digest, blobs // Make sure nothing is mounted // This could happen if the plugin was disabled with `-f` with active mounts. // If there is anything in `orig` is still mounted, this should error out. - if err := recursiveUnmount(orig); err != nil { + if err := mount.RecursiveUnmount(orig); err != nil { return err }