mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #17191 from mrjana/restart
Fix docker startup failure due to dangling endpoints
This commit is contained in:
commit
566964bf73
13 changed files with 196 additions and 67 deletions
|
@ -910,6 +910,8 @@ func (container *Container) ConnectToNetwork(idOrName string) error {
|
|||
}
|
||||
|
||||
func (container *Container) connectToNetwork(idOrName string, updateSettings bool) error {
|
||||
var err error
|
||||
|
||||
if container.hostConfig.NetworkMode.IsContainer() {
|
||||
return runconfig.ErrConflictSharedNetwork
|
||||
}
|
||||
|
@ -934,23 +936,32 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo
|
|||
}
|
||||
|
||||
ep, err := container.getEndpointInNetwork(n)
|
||||
if err != nil {
|
||||
if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok {
|
||||
return err
|
||||
}
|
||||
|
||||
createOptions, err := container.buildCreateEndpointOptions()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
endpointName := strings.TrimPrefix(container.Name, "/")
|
||||
ep, err = n.CreateEndpoint(endpointName, createOptions...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err == nil {
|
||||
return fmt.Errorf("container already connected to network %s", idOrName)
|
||||
}
|
||||
|
||||
if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok {
|
||||
return err
|
||||
}
|
||||
|
||||
createOptions, err := container.buildCreateEndpointOptions()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
endpointName := strings.TrimPrefix(container.Name, "/")
|
||||
ep, err = n.CreateEndpoint(endpointName, createOptions...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if e := ep.Delete(); e != nil {
|
||||
logrus.Warnf("Could not rollback container connection to network %s", idOrName)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if err := container.updateEndpointNetworkSettings(n, ep); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -972,9 +983,9 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
container.updateSandboxNetworkSettings(sb)
|
||||
container.updateSandboxNetworkSettings(sb)
|
||||
}
|
||||
|
||||
if err := ep.Join(sb); err != nil {
|
||||
return err
|
||||
|
@ -1120,14 +1131,6 @@ func (container *Container) releaseNetwork() {
|
|||
return
|
||||
}
|
||||
|
||||
for _, ns := range networks {
|
||||
n, err := container.daemon.FindNetwork(ns)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
container.disconnectFromNetwork(n, false)
|
||||
}
|
||||
|
||||
if err := sb.Delete(); err != nil {
|
||||
logrus.Errorf("Error deleting sandbox id %s for container %s: %v", sid, container.ID, err)
|
||||
}
|
||||
|
@ -1139,10 +1142,10 @@ func (container *Container) DisconnectFromNetwork(n libnetwork.Network) error {
|
|||
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
|
||||
}
|
||||
|
||||
return container.disconnectFromNetwork(n, true)
|
||||
return container.disconnectFromNetwork(n)
|
||||
}
|
||||
|
||||
func (container *Container) disconnectFromNetwork(n libnetwork.Network, updateSettings bool) error {
|
||||
func (container *Container) disconnectFromNetwork(n libnetwork.Network) error {
|
||||
var (
|
||||
ep libnetwork.Endpoint
|
||||
sbox libnetwork.Sandbox
|
||||
|
@ -1172,20 +1175,19 @@ func (container *Container) disconnectFromNetwork(n libnetwork.Network, updateSe
|
|||
return fmt.Errorf("endpoint delete failed for container %s on network %s: %v", container.ID, n.Name(), err)
|
||||
}
|
||||
|
||||
if updateSettings {
|
||||
networks := container.NetworkSettings.Networks
|
||||
for i, s := range networks {
|
||||
sn, err := container.daemon.FindNetwork(s)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if sn.Name() == n.Name() {
|
||||
networks = append(networks[:i], networks[i+1:]...)
|
||||
container.NetworkSettings.Networks = networks
|
||||
break
|
||||
}
|
||||
networks := container.NetworkSettings.Networks
|
||||
for i, s := range networks {
|
||||
sn, err := container.daemon.FindNetwork(s)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if sn.Name() == n.Name() {
|
||||
networks = append(networks[:i], networks[i+1:]...)
|
||||
container.NetworkSettings.Networks = networks
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b
|
|||
clone git golang.org/x/net 3cffabab72adf04f8e3b01c5baf775361837b5fe https://github.com/golang/net.git
|
||||
|
||||
#get libnetwork packages
|
||||
clone git github.com/docker/libnetwork 2934f6bf585fa24c86048cc85f7506a5bb626bf5
|
||||
clone git github.com/docker/libnetwork fc6cbea49cd8197c0a8d22b9e8f24f37d9e7b1b8
|
||||
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
|
||||
clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
|
||||
clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
.PHONY: all all-local build build-local check check-code check-format run-tests check-local integration-tests install-deps coveralls circle-ci start-services clean
|
||||
SHELL=/bin/bash
|
||||
build_image=libnetwork-build
|
||||
build_image=libnetworkbuild
|
||||
dockerargs = --privileged -v $(shell pwd):/go/src/github.com/docker/libnetwork -w /go/src/github.com/docker/libnetwork
|
||||
container_env = -e "INSIDECONTAINER=-incontainer=true"
|
||||
docker = docker run --rm -it ${dockerargs} ${container_env} ${build_image}
|
||||
|
|
|
@ -192,6 +192,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
|
|||
}
|
||||
|
||||
c.sandboxCleanup()
|
||||
c.cleanupLocalEndpoints()
|
||||
|
||||
if err := c.startExternalKeyListener(); err != nil {
|
||||
return nil, err
|
||||
|
@ -356,7 +357,7 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti
|
|||
}
|
||||
}()
|
||||
|
||||
if err := c.addNetwork(network); err != nil {
|
||||
if err = c.addNetwork(network); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
|
|
|
@ -188,7 +188,7 @@ func ParseKey(key string) ([]string, error) {
|
|||
}
|
||||
|
||||
// newClient used to connect to KV Store
|
||||
func newClient(scope string, kv string, addrs string, config *store.Config, cached bool) (DataStore, error) {
|
||||
func newClient(scope string, kv string, addr string, config *store.Config, cached bool) (DataStore, error) {
|
||||
if cached && scope != LocalScope {
|
||||
return nil, fmt.Errorf("caching supported only for scope %s", LocalScope)
|
||||
}
|
||||
|
@ -196,7 +196,10 @@ func newClient(scope string, kv string, addrs string, config *store.Config, cach
|
|||
if config == nil {
|
||||
config = &store.Config{}
|
||||
}
|
||||
store, err := libkv.NewStore(store.Backend(kv), []string{addrs}, config)
|
||||
|
||||
addrs := strings.Split(addr, ",")
|
||||
|
||||
store, err := libkv.NewStore(store.Backend(kv), addrs, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -262,6 +265,13 @@ func (ds *datastore) Watch(kvObject KVObject, stopCh <-chan struct{}) (<-chan KV
|
|||
close(sCh)
|
||||
return
|
||||
case kvPair := <-kvpCh:
|
||||
// If the backend KV store gets reset libkv's go routine
|
||||
// for the watch can exit resulting in a nil value in
|
||||
// channel.
|
||||
if kvPair == nil {
|
||||
close(sCh)
|
||||
return
|
||||
}
|
||||
dstO := ctor.New()
|
||||
|
||||
if err := dstO.SetValue(kvPair.Value); err != nil {
|
||||
|
|
|
@ -989,7 +989,7 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
|
|||
d.Unlock()
|
||||
|
||||
if !ok {
|
||||
return types.NotFoundErrorf("network %s does not exist", nid)
|
||||
return types.InternalMaskableErrorf("network %s does not exist", nid)
|
||||
}
|
||||
if n == nil {
|
||||
return driverapi.ErrNoNetwork(nid)
|
||||
|
@ -1145,7 +1145,7 @@ func (d *driver) Leave(nid, eid string) error {
|
|||
|
||||
network, err := d.getNetwork(nid)
|
||||
if err != nil {
|
||||
return err
|
||||
return types.InternalMaskableErrorf("%s", err)
|
||||
}
|
||||
|
||||
endpoint, err := network.getEndpoint(eid)
|
||||
|
|
|
@ -179,6 +179,7 @@ func (n *network) destroySandbox() {
|
|||
}
|
||||
}
|
||||
sbox.Destroy()
|
||||
n.setSandbox(nil)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -193,7 +194,7 @@ func (n *network) initSubnetSandbox(s *subnet) error {
|
|||
if err := sbox.AddInterface(brName, "br",
|
||||
sbox.InterfaceOptions().Address(s.gwIP),
|
||||
sbox.InterfaceOptions().Bridge(true)); err != nil {
|
||||
return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.IP.String(), err)
|
||||
return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
|
||||
}
|
||||
|
||||
vxlanName, err := createVxlan(n.vxlanID(s))
|
||||
|
@ -203,7 +204,7 @@ func (n *network) initSubnetSandbox(s *subnet) error {
|
|||
|
||||
if err := sbox.AddInterface(vxlanName, "vxlan",
|
||||
sbox.InterfaceOptions().Master(brName)); err != nil {
|
||||
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.IP.String(), err)
|
||||
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
|
||||
}
|
||||
|
||||
n.Lock()
|
||||
|
|
|
@ -425,28 +425,32 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error {
|
|||
|
||||
ep.processOptions(options...)
|
||||
|
||||
ep.Lock()
|
||||
ep.sandboxID = ""
|
||||
ep.network = n
|
||||
ep.Unlock()
|
||||
|
||||
if err := n.getController().updateToStore(ep); err != nil {
|
||||
ep.Lock()
|
||||
ep.sandboxID = sid
|
||||
ep.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
d, err := n.driver()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to leave endpoint: %v", err)
|
||||
}
|
||||
|
||||
ep.Lock()
|
||||
ep.sandboxID = ""
|
||||
ep.network = n
|
||||
ep.Unlock()
|
||||
|
||||
if err := d.Leave(n.id, ep.id); err != nil {
|
||||
return err
|
||||
if _, ok := err.(types.MaskableError); !ok {
|
||||
log.Warnf("driver error disconnecting container %s : %v", ep.name, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := sb.clearNetworkResources(ep); err != nil {
|
||||
log.Warnf("Could not cleanup network resources on container %s disconnect: %v", ep.name, err)
|
||||
}
|
||||
|
||||
// Update the store about the sandbox detach only after we
|
||||
// have completed sb.clearNetworkresources above to avoid
|
||||
// spurious logs when cleaning up the sandbox when the daemon
|
||||
// ungracefully exits and restarts before completing sandbox
|
||||
// detach but after store has been updated.
|
||||
if err := n.getController().updateToStore(ep); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -532,7 +536,10 @@ func (ep *endpoint) deleteEndpoint() error {
|
|||
if _, ok := err.(types.ForbiddenError); ok {
|
||||
return err
|
||||
}
|
||||
log.Warnf("driver error deleting endpoint %s : %v", name, err)
|
||||
|
||||
if _, ok := err.(types.MaskableError); !ok {
|
||||
log.Warnf("driver error deleting endpoint %s : %v", name, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -704,3 +711,25 @@ func (ep *endpoint) releaseAddress() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *controller) cleanupLocalEndpoints() {
|
||||
nl, err := c.getNetworksForScope(datastore.LocalScope)
|
||||
if err != nil {
|
||||
log.Warnf("Could not get list of networks during endpoint cleanup: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, n := range nl {
|
||||
epl, err := n.getEndpointsFromStore()
|
||||
if err != nil {
|
||||
log.Warnf("Could not get list of endpoints in network %s during endpoint cleanup: %v", n.name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, ep := range epl {
|
||||
if err := ep.Delete(); err != nil {
|
||||
log.Warnf("Could not delete local endpoint %s during endpoint cleanup: %v", ep.name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"net"
|
||||
|
||||
"github.com/docker/libnetwork/netutils"
|
||||
"github.com/docker/libnetwork/osl"
|
||||
"github.com/docker/libnetwork/resolvconf"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
@ -21,6 +22,8 @@ func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
|
|||
err error
|
||||
)
|
||||
|
||||
defer osl.InitOSContext()()
|
||||
|
||||
link, _ := netlink.LinkByName(name)
|
||||
if link != nil {
|
||||
v4addr, err := netlink.AddrList(link, netlink.FAMILY_V4)
|
||||
|
|
|
@ -161,8 +161,8 @@ func GenerateIfaceName(prefix string, len int) (string, error) {
|
|||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if _, err := net.InterfaceByName(name); err != nil {
|
||||
if strings.Contains(err.Error(), "no such") {
|
||||
if _, err := netlink.LinkByName(name); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return name, nil
|
||||
}
|
||||
return "", err
|
||||
|
|
|
@ -66,6 +66,7 @@ type sandbox struct {
|
|||
joinLeaveDone chan struct{}
|
||||
dbIndex uint64
|
||||
dbExists bool
|
||||
inDelete bool
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
|
@ -146,6 +147,22 @@ func (sb *sandbox) Statistics() (map[string]*types.InterfaceStatistics, error) {
|
|||
}
|
||||
|
||||
func (sb *sandbox) Delete() error {
|
||||
sb.Lock()
|
||||
if sb.inDelete {
|
||||
sb.Unlock()
|
||||
return types.ForbiddenErrorf("another sandbox delete in progress")
|
||||
}
|
||||
// Set the inDelete flag. This will ensure that we don't
|
||||
// update the store until we have completed all the endpoint
|
||||
// leaves and deletes. And when endpoint leaves and deletes
|
||||
// are completed then we can finally delete the sandbox object
|
||||
// altogether from the data store. If the daemon exits
|
||||
// ungracefully in the middle of a sandbox delete this way we
|
||||
// will have all the references to the endpoints in the
|
||||
// sandbox so that we can clean them up when we restart
|
||||
sb.inDelete = true
|
||||
sb.Unlock()
|
||||
|
||||
c := sb.controller
|
||||
|
||||
// Detach from all endpoints
|
||||
|
@ -355,6 +372,10 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) {
|
|||
joinInfo := ep.joinInfo
|
||||
ep.Unlock()
|
||||
|
||||
if joinInfo == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Remove non-interface routes.
|
||||
for _, r := range joinInfo.StaticRoutes {
|
||||
if err := osSbox.RemoveStaticRoute(r); err != nil {
|
||||
|
@ -386,6 +407,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
|
|||
sb.Unlock()
|
||||
return nil
|
||||
}
|
||||
inDelete := sb.inDelete
|
||||
sb.Unlock()
|
||||
|
||||
ep.Lock()
|
||||
|
@ -425,7 +447,16 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
|
|||
}
|
||||
}
|
||||
}
|
||||
return sb.storeUpdate()
|
||||
|
||||
// Only update the store if we did not come here as part of
|
||||
// sandbox delete. If we came here as part of delete then do
|
||||
// not bother updating the store. The sandbox object will be
|
||||
// deleted anyway
|
||||
if !inDelete {
|
||||
return sb.storeUpdate()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
|
||||
|
@ -437,6 +468,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
|
|||
|
||||
sb.Lock()
|
||||
osSbox := sb.osSbox
|
||||
inDelete := sb.inDelete
|
||||
sb.Unlock()
|
||||
if osSbox != nil {
|
||||
releaseOSSboxResources(osSbox, ep)
|
||||
|
@ -480,7 +512,15 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
|
|||
sb.updateGateway(gwepAfter)
|
||||
}
|
||||
|
||||
return sb.storeUpdate()
|
||||
// Only update the store if we did not come here as part of
|
||||
// sandbox delete. If we came here as part of delete then do
|
||||
// not bother updating the store. The sandbox object will be
|
||||
// deleted anyway
|
||||
if !inDelete {
|
||||
return sb.storeUpdate()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
|
|
|
@ -123,6 +123,8 @@ func (sb *sandbox) storeUpdate() error {
|
|||
ID: sb.id,
|
||||
}
|
||||
|
||||
retry:
|
||||
sbs.Eps = nil
|
||||
for _, ep := range sb.getConnectedEndpoints() {
|
||||
eps := epState{
|
||||
Nid: ep.getNetwork().ID(),
|
||||
|
@ -132,7 +134,16 @@ func (sb *sandbox) storeUpdate() error {
|
|||
sbs.Eps = append(sbs.Eps, eps)
|
||||
}
|
||||
|
||||
return sb.controller.updateToStore(sbs)
|
||||
err := sb.controller.updateToStore(sbs)
|
||||
if err == datastore.ErrKeyModified {
|
||||
// When we get ErrKeyModified it is sufficient to just
|
||||
// go back and retry. No need to get the object from
|
||||
// the store because we always regenerate the store
|
||||
// state from in memory sandbox state
|
||||
goto retry
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (sb *sandbox) storeDelete() error {
|
||||
|
|
32
vendor/src/github.com/docker/libnetwork/store.go
vendored
32
vendor/src/github.com/docker/libnetwork/store.go
vendored
|
@ -82,6 +82,38 @@ func (c *controller) getNetworkFromStore(nid string) (*network, error) {
|
|||
return nil, fmt.Errorf("network %s not found", nid)
|
||||
}
|
||||
|
||||
func (c *controller) getNetworksForScope(scope string) ([]*network, error) {
|
||||
var nl []*network
|
||||
|
||||
store := c.getStore(scope)
|
||||
if store == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
kvol, err := store.List(datastore.Key(datastore.NetworkKeyPrefix),
|
||||
&network{ctrlr: c})
|
||||
if err != nil && err != datastore.ErrKeyNotFound {
|
||||
return nil, fmt.Errorf("failed to get networks for scope %s: %v",
|
||||
scope, err)
|
||||
}
|
||||
|
||||
for _, kvo := range kvol {
|
||||
n := kvo.(*network)
|
||||
n.ctrlr = c
|
||||
|
||||
ec := &endpointCnt{n: n}
|
||||
err = store.GetObject(datastore.Key(ec.Key()...), ec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
|
||||
}
|
||||
|
||||
n.epCnt = ec
|
||||
nl = append(nl, n)
|
||||
}
|
||||
|
||||
return nl, nil
|
||||
}
|
||||
|
||||
func (c *controller) getNetworksFromStore() ([]*network, error) {
|
||||
var nl []*network
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue