diff --git a/daemon/cluster/cluster.go b/daemon/cluster/cluster.go index 8c70526c5e..7400346fbf 100644 --- a/daemon/cluster/cluster.go +++ b/daemon/cluster/cluster.go @@ -93,7 +93,7 @@ func New(config Config) (*Cluster, error) { reconnectDelay: initialReconnectDelay, } - dt, err := ioutil.ReadFile(filepath.Join(root, stateFile)) + st, err := c.loadState() if err != nil { if os.IsNotExist(err) { return c, nil @@ -101,11 +101,6 @@ func New(config Config) (*Cluster, error) { return nil, err } - var st state - if err := json.Unmarshal(dt, &st); err != nil { - return nil, err - } - n, ctx, err := c.startNewNode(false, st.ListenAddr, "", "", "", false) if err != nil { return nil, err @@ -124,6 +119,25 @@ func New(config Config) (*Cluster, error) { return c, nil } +func (c *Cluster) loadState() (*state, error) { + dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile)) + if err != nil { + return nil, err + } + // missing certificate means no actual state to restore from + if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil { + if os.IsNotExist(err) { + c.clearState() + } + return nil, err + } + var st state + if err := json.Unmarshal(dt, &st); err != nil { + return nil, err + } + return &st, nil +} + func (c *Cluster) saveState() error { dt, err := json.Marshal(state{ListenAddr: c.listenAddr}) if err != nil { @@ -410,6 +424,7 @@ func (c *Cluster) Leave(force bool) error { } func (c *Cluster) clearState() error { + // todo: backup this data instead of removing? if err := os.RemoveAll(c.root); err != nil { return err } diff --git a/integration-cli/docker_api_swarm_test.go b/integration-cli/docker_api_swarm_test.go index 9f086bd930..7b99a33348 100644 --- a/integration-cli/docker_api_swarm_test.go +++ b/integration-cli/docker_api_swarm_test.go @@ -578,6 +578,31 @@ func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) { c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2)) } +// #23705 +func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) { + d := s.AddDaemon(c, false, false) + go d.Join("nosuchhost:1234", "", "", false) // will block on pending state + + for i := 0; ; i++ { + info, err := d.info() + c.Assert(err, checker.IsNil) + if info.LocalNodeState == swarm.LocalNodeStatePending { + break + } + if i > 10 { + c.Fatalf("node did not go to pending state: %v", info.LocalNodeState) + } + time.Sleep(100 * time.Millisecond) + } + + c.Assert(d.Stop(), checker.IsNil) + c.Assert(d.Start(), checker.IsNil) + + info, err := d.info() + c.Assert(err, checker.IsNil) + c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) +} + func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) { d1 := s.AddDaemon(c, true, true)