mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Make plugin removes more resilient to failure
Before this patch, if the plugin's `config.json` is successfully removed but the main plugin state dir could not be removed for some reason (e.g. leaked mount), it will prevent the daemon from being able to be restarted. This patches changes this to atomically remove the plugin such that on daemon restart we can detect that there was an error and re-try. It also changes the logic so that it only logs errors on restore rather than erroring out the daemon. This also removes some code which is now duplicated elsewhere. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
This commit is contained in:
parent
8b29ba59cc
commit
11cf394e5e
4 changed files with 81 additions and 29 deletions
|
@ -5,6 +5,7 @@ package main
|
|||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
@ -25,6 +26,9 @@ import (
|
|||
"crypto/x509"
|
||||
|
||||
"github.com/cloudflare/cfssl/helpers"
|
||||
"github.com/docker/docker/api"
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/docker/integration-cli/checker"
|
||||
"github.com/docker/docker/integration-cli/cli"
|
||||
"github.com/docker/docker/integration-cli/daemon"
|
||||
|
@ -2980,3 +2984,45 @@ func (s *DockerDaemonSuite) TestShmSizeReload(c *check.C) {
|
|||
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
||||
c.Assert(strings.TrimSpace(out), check.Equals, fmt.Sprintf("%v", size))
|
||||
}
|
||||
|
||||
// TestFailedPluginRemove makes sure that a failed plugin remove does not block
|
||||
// the daemon from starting
|
||||
func (s *DockerDaemonSuite) TestFailedPluginRemove(c *check.C) {
|
||||
testRequires(c, DaemonIsLinux, IsAmd64, SameHostDaemon)
|
||||
d := daemon.New(c, dockerBinary, dockerdBinary, daemon.Config{})
|
||||
d.Start(c)
|
||||
cli, err := client.NewClient(d.Sock(), api.DefaultVersion, nil, nil)
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second)
|
||||
defer cancel()
|
||||
|
||||
name := "test-plugin-rm-fail"
|
||||
out, err := cli.PluginInstall(ctx, name, types.PluginInstallOptions{
|
||||
Disabled: true,
|
||||
AcceptAllPermissions: true,
|
||||
RemoteRef: "cpuguy83/docker-logdriver-test",
|
||||
})
|
||||
c.Assert(err, checker.IsNil)
|
||||
defer out.Close()
|
||||
io.Copy(ioutil.Discard, out)
|
||||
|
||||
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
p, _, err := cli.PluginInspectWithRaw(ctx, name)
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
// simulate a bad/partial removal by removing the plugin config.
|
||||
configPath := filepath.Join(d.Root, "plugins", p.ID, "config.json")
|
||||
c.Assert(os.Remove(configPath), checker.IsNil)
|
||||
|
||||
d.Restart(c)
|
||||
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
_, err = cli.Ping(ctx)
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
_, _, err = cli.PluginInspectWithRaw(ctx, name)
|
||||
// plugin should be gone since the config.json is gone
|
||||
c.Assert(err, checker.NotNil)
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@ import (
|
|||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
|
@ -32,6 +31,7 @@ import (
|
|||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/pools"
|
||||
"github.com/docker/docker/pkg/progress"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
"github.com/docker/docker/plugin/v2"
|
||||
refstore "github.com/docker/docker/reference"
|
||||
"github.com/opencontainers/go-digest"
|
||||
|
@ -624,14 +624,20 @@ func (pm *Manager) Remove(name string, config *types.PluginRmConfig) error {
|
|||
}()
|
||||
|
||||
id := p.GetID()
|
||||
pm.config.Store.Remove(p)
|
||||
pluginDir := filepath.Join(pm.config.Root, id)
|
||||
if err := recursiveUnmount(pluginDir); err != nil {
|
||||
logrus.WithField("dir", pluginDir).WithField("id", id).Warn(err)
|
||||
|
||||
if err := mount.RecursiveUnmount(pluginDir); err != nil {
|
||||
return errors.Wrap(err, "error unmounting plugin data")
|
||||
}
|
||||
if err := os.RemoveAll(pluginDir); err != nil {
|
||||
logrus.Warnf("unable to remove %q from plugin remove: %v", pluginDir, err)
|
||||
|
||||
if err := os.Rename(pluginDir, pluginDir+"-removing"); err != nil {
|
||||
return errors.Wrap(err, "error performing atomic remove of plugin dir")
|
||||
}
|
||||
|
||||
if err := system.EnsureRemoveAll(pluginDir); err != nil {
|
||||
return errors.Wrap(err, "error removing plugin dir")
|
||||
}
|
||||
pm.config.Store.Remove(p)
|
||||
pm.config.LogPluginEvent(id, name, "remove")
|
||||
return nil
|
||||
}
|
||||
|
@ -652,27 +658,6 @@ func getMounts(root string) ([]string, error) {
|
|||
return mounts, nil
|
||||
}
|
||||
|
||||
func recursiveUnmount(root string) error {
|
||||
mounts, err := getMounts(root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// sort in reverse-lexicographic order so the root mount will always be last
|
||||
sort.Sort(sort.Reverse(sort.StringSlice(mounts)))
|
||||
|
||||
for i, m := range mounts {
|
||||
if err := mount.Unmount(m); err != nil {
|
||||
if i == len(mounts)-1 {
|
||||
return errors.Wrapf(err, "error performing recursive unmount on %s", root)
|
||||
}
|
||||
logrus.WithError(err).WithField("mountpoint", m).Warn("could not unmount")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set sets plugin args
|
||||
func (pm *Manager) Set(name string, args []string) error {
|
||||
p, err := pm.config.Store.GetV2Plugin(name)
|
||||
|
|
|
@ -163,6 +163,19 @@ func (pm *Manager) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func handleLoadError(err error, id string) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
logger := logrus.WithError(err).WithField("id", id)
|
||||
if os.IsNotExist(errors.Cause(err)) {
|
||||
// Likely some error while removing on an older version of docker
|
||||
logger.Warn("missing plugin config, skipping: this may be caused due to a failed remove and requires manual cleanup.")
|
||||
return
|
||||
}
|
||||
logger.Error("error loading plugin, skipping")
|
||||
}
|
||||
|
||||
func (pm *Manager) reload() error { // todo: restore
|
||||
dir, err := ioutil.ReadDir(pm.config.Root)
|
||||
if err != nil {
|
||||
|
@ -173,9 +186,17 @@ func (pm *Manager) reload() error { // todo: restore
|
|||
if validFullID.MatchString(v.Name()) {
|
||||
p, err := pm.loadPlugin(v.Name())
|
||||
if err != nil {
|
||||
return err
|
||||
handleLoadError(err, v.Name())
|
||||
continue
|
||||
}
|
||||
plugins[p.GetID()] = p
|
||||
} else {
|
||||
if validFullID.MatchString(strings.TrimSuffix(v.Name(), "-removing")) {
|
||||
// There was likely some error while removing this plugin, let's try to remove again here
|
||||
if err := system.EnsureRemoveAll(v.Name()); err != nil {
|
||||
logrus.WithError(err).WithField("id", v.Name()).Warn("error while attempting to clean up previously removed plugin")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ func (pm *Manager) upgradePlugin(p *v2.Plugin, configDigest digest.Digest, blobs
|
|||
// Make sure nothing is mounted
|
||||
// This could happen if the plugin was disabled with `-f` with active mounts.
|
||||
// If there is anything in `orig` is still mounted, this should error out.
|
||||
if err := recursiveUnmount(orig); err != nil {
|
||||
if err := mount.RecursiveUnmount(orig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue