1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Merge pull request #17191 from mrjana/restart

Fix docker startup failure due to dangling endpoints
This commit is contained in:
Tibor Vass 2015-10-20 10:32:33 -04:00
commit 566964bf73
13 changed files with 196 additions and 67 deletions

View file

@ -910,6 +910,8 @@ func (container *Container) ConnectToNetwork(idOrName string) error {
}
func (container *Container) connectToNetwork(idOrName string, updateSettings bool) error {
var err error
if container.hostConfig.NetworkMode.IsContainer() {
return runconfig.ErrConflictSharedNetwork
}
@ -934,23 +936,32 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo
}
ep, err := container.getEndpointInNetwork(n)
if err != nil {
if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok {
return err
}
createOptions, err := container.buildCreateEndpointOptions()
if err != nil {
return err
}
endpointName := strings.TrimPrefix(container.Name, "/")
ep, err = n.CreateEndpoint(endpointName, createOptions...)
if err != nil {
return err
}
if err == nil {
return fmt.Errorf("container already connected to network %s", idOrName)
}
if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok {
return err
}
createOptions, err := container.buildCreateEndpointOptions()
if err != nil {
return err
}
endpointName := strings.TrimPrefix(container.Name, "/")
ep, err = n.CreateEndpoint(endpointName, createOptions...)
if err != nil {
return err
}
defer func() {
if err != nil {
if e := ep.Delete(); e != nil {
logrus.Warnf("Could not rollback container connection to network %s", idOrName)
}
}
}()
if err := container.updateEndpointNetworkSettings(n, ep); err != nil {
return err
}
@ -972,9 +983,9 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo
if err != nil {
return err
}
}
container.updateSandboxNetworkSettings(sb)
container.updateSandboxNetworkSettings(sb)
}
if err := ep.Join(sb); err != nil {
return err
@ -1120,14 +1131,6 @@ func (container *Container) releaseNetwork() {
return
}
for _, ns := range networks {
n, err := container.daemon.FindNetwork(ns)
if err != nil {
continue
}
container.disconnectFromNetwork(n, false)
}
if err := sb.Delete(); err != nil {
logrus.Errorf("Error deleting sandbox id %s for container %s: %v", sid, container.ID, err)
}
@ -1139,10 +1142,10 @@ func (container *Container) DisconnectFromNetwork(n libnetwork.Network) error {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}
return container.disconnectFromNetwork(n, true)
return container.disconnectFromNetwork(n)
}
func (container *Container) disconnectFromNetwork(n libnetwork.Network, updateSettings bool) error {
func (container *Container) disconnectFromNetwork(n libnetwork.Network) error {
var (
ep libnetwork.Endpoint
sbox libnetwork.Sandbox
@ -1172,20 +1175,19 @@ func (container *Container) disconnectFromNetwork(n libnetwork.Network, updateSe
return fmt.Errorf("endpoint delete failed for container %s on network %s: %v", container.ID, n.Name(), err)
}
if updateSettings {
networks := container.NetworkSettings.Networks
for i, s := range networks {
sn, err := container.daemon.FindNetwork(s)
if err != nil {
continue
}
if sn.Name() == n.Name() {
networks = append(networks[:i], networks[i+1:]...)
container.NetworkSettings.Networks = networks
break
}
networks := container.NetworkSettings.Networks
for i, s := range networks {
sn, err := container.daemon.FindNetwork(s)
if err != nil {
continue
}
if sn.Name() == n.Name() {
networks = append(networks[:i], networks[i+1:]...)
container.NetworkSettings.Networks = networks
break
}
}
return nil
}

View file

@ -21,7 +21,7 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b
clone git golang.org/x/net 3cffabab72adf04f8e3b01c5baf775361837b5fe https://github.com/golang/net.git
#get libnetwork packages
clone git github.com/docker/libnetwork 2934f6bf585fa24c86048cc85f7506a5bb626bf5
clone git github.com/docker/libnetwork fc6cbea49cd8197c0a8d22b9e8f24f37d9e7b1b8
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4

View file

@ -1,6 +1,6 @@
.PHONY: all all-local build build-local check check-code check-format run-tests check-local integration-tests install-deps coveralls circle-ci start-services clean
SHELL=/bin/bash
build_image=libnetwork-build
build_image=libnetworkbuild
dockerargs = --privileged -v $(shell pwd):/go/src/github.com/docker/libnetwork -w /go/src/github.com/docker/libnetwork
container_env = -e "INSIDECONTAINER=-incontainer=true"
docker = docker run --rm -it ${dockerargs} ${container_env} ${build_image}

View file

@ -192,6 +192,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
}
c.sandboxCleanup()
c.cleanupLocalEndpoints()
if err := c.startExternalKeyListener(); err != nil {
return nil, err
@ -356,7 +357,7 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti
}
}()
if err := c.addNetwork(network); err != nil {
if err = c.addNetwork(network); err != nil {
return nil, err
}
defer func() {

View file

@ -188,7 +188,7 @@ func ParseKey(key string) ([]string, error) {
}
// newClient used to connect to KV Store
func newClient(scope string, kv string, addrs string, config *store.Config, cached bool) (DataStore, error) {
func newClient(scope string, kv string, addr string, config *store.Config, cached bool) (DataStore, error) {
if cached && scope != LocalScope {
return nil, fmt.Errorf("caching supported only for scope %s", LocalScope)
}
@ -196,7 +196,10 @@ func newClient(scope string, kv string, addrs string, config *store.Config, cach
if config == nil {
config = &store.Config{}
}
store, err := libkv.NewStore(store.Backend(kv), []string{addrs}, config)
addrs := strings.Split(addr, ",")
store, err := libkv.NewStore(store.Backend(kv), addrs, config)
if err != nil {
return nil, err
}
@ -262,6 +265,13 @@ func (ds *datastore) Watch(kvObject KVObject, stopCh <-chan struct{}) (<-chan KV
close(sCh)
return
case kvPair := <-kvpCh:
// If the backend KV store gets reset libkv's go routine
// for the watch can exit resulting in a nil value in
// channel.
if kvPair == nil {
close(sCh)
return
}
dstO := ctor.New()
if err := dstO.SetValue(kvPair.Value); err != nil {

View file

@ -989,7 +989,7 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
d.Unlock()
if !ok {
return types.NotFoundErrorf("network %s does not exist", nid)
return types.InternalMaskableErrorf("network %s does not exist", nid)
}
if n == nil {
return driverapi.ErrNoNetwork(nid)
@ -1145,7 +1145,7 @@ func (d *driver) Leave(nid, eid string) error {
network, err := d.getNetwork(nid)
if err != nil {
return err
return types.InternalMaskableErrorf("%s", err)
}
endpoint, err := network.getEndpoint(eid)

View file

@ -179,6 +179,7 @@ func (n *network) destroySandbox() {
}
}
sbox.Destroy()
n.setSandbox(nil)
}
}
@ -193,7 +194,7 @@ func (n *network) initSubnetSandbox(s *subnet) error {
if err := sbox.AddInterface(brName, "br",
sbox.InterfaceOptions().Address(s.gwIP),
sbox.InterfaceOptions().Bridge(true)); err != nil {
return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.IP.String(), err)
return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
}
vxlanName, err := createVxlan(n.vxlanID(s))
@ -203,7 +204,7 @@ func (n *network) initSubnetSandbox(s *subnet) error {
if err := sbox.AddInterface(vxlanName, "vxlan",
sbox.InterfaceOptions().Master(brName)); err != nil {
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.IP.String(), err)
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
}
n.Lock()

View file

@ -425,28 +425,32 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error {
ep.processOptions(options...)
ep.Lock()
ep.sandboxID = ""
ep.network = n
ep.Unlock()
if err := n.getController().updateToStore(ep); err != nil {
ep.Lock()
ep.sandboxID = sid
ep.Unlock()
return err
}
d, err := n.driver()
if err != nil {
return fmt.Errorf("failed to leave endpoint: %v", err)
}
ep.Lock()
ep.sandboxID = ""
ep.network = n
ep.Unlock()
if err := d.Leave(n.id, ep.id); err != nil {
return err
if _, ok := err.(types.MaskableError); !ok {
log.Warnf("driver error disconnecting container %s : %v", ep.name, err)
}
}
if err := sb.clearNetworkResources(ep); err != nil {
log.Warnf("Could not cleanup network resources on container %s disconnect: %v", ep.name, err)
}
// Update the store about the sandbox detach only after we
// have completed sb.clearNetworkresources above to avoid
// spurious logs when cleaning up the sandbox when the daemon
// ungracefully exits and restarts before completing sandbox
// detach but after store has been updated.
if err := n.getController().updateToStore(ep); err != nil {
return err
}
@ -532,7 +536,10 @@ func (ep *endpoint) deleteEndpoint() error {
if _, ok := err.(types.ForbiddenError); ok {
return err
}
log.Warnf("driver error deleting endpoint %s : %v", name, err)
if _, ok := err.(types.MaskableError); !ok {
log.Warnf("driver error deleting endpoint %s : %v", name, err)
}
}
return nil
@ -704,3 +711,25 @@ func (ep *endpoint) releaseAddress() {
}
}
}
func (c *controller) cleanupLocalEndpoints() {
nl, err := c.getNetworksForScope(datastore.LocalScope)
if err != nil {
log.Warnf("Could not get list of networks during endpoint cleanup: %v", err)
return
}
for _, n := range nl {
epl, err := n.getEndpointsFromStore()
if err != nil {
log.Warnf("Could not get list of endpoints in network %s during endpoint cleanup: %v", n.name, err)
continue
}
for _, ep := range epl {
if err := ep.Delete(); err != nil {
log.Warnf("Could not delete local endpoint %s during endpoint cleanup: %v", ep.name, err)
}
}
}
}

View file

@ -6,6 +6,7 @@ import (
"net"
"github.com/docker/libnetwork/netutils"
"github.com/docker/libnetwork/osl"
"github.com/docker/libnetwork/resolvconf"
"github.com/vishvananda/netlink"
)
@ -21,6 +22,8 @@ func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
err error
)
defer osl.InitOSContext()()
link, _ := netlink.LinkByName(name)
if link != nil {
v4addr, err := netlink.AddrList(link, netlink.FAMILY_V4)

View file

@ -161,8 +161,8 @@ func GenerateIfaceName(prefix string, len int) (string, error) {
if err != nil {
continue
}
if _, err := net.InterfaceByName(name); err != nil {
if strings.Contains(err.Error(), "no such") {
if _, err := netlink.LinkByName(name); err != nil {
if strings.Contains(err.Error(), "not found") {
return name, nil
}
return "", err

View file

@ -66,6 +66,7 @@ type sandbox struct {
joinLeaveDone chan struct{}
dbIndex uint64
dbExists bool
inDelete bool
sync.Mutex
}
@ -146,6 +147,22 @@ func (sb *sandbox) Statistics() (map[string]*types.InterfaceStatistics, error) {
}
func (sb *sandbox) Delete() error {
sb.Lock()
if sb.inDelete {
sb.Unlock()
return types.ForbiddenErrorf("another sandbox delete in progress")
}
// Set the inDelete flag. This will ensure that we don't
// update the store until we have completed all the endpoint
// leaves and deletes. And when endpoint leaves and deletes
// are completed then we can finally delete the sandbox object
// altogether from the data store. If the daemon exits
// ungracefully in the middle of a sandbox delete this way we
// will have all the references to the endpoints in the
// sandbox so that we can clean them up when we restart
sb.inDelete = true
sb.Unlock()
c := sb.controller
// Detach from all endpoints
@ -355,6 +372,10 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) {
joinInfo := ep.joinInfo
ep.Unlock()
if joinInfo == nil {
return
}
// Remove non-interface routes.
for _, r := range joinInfo.StaticRoutes {
if err := osSbox.RemoveStaticRoute(r); err != nil {
@ -386,6 +407,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
sb.Unlock()
return nil
}
inDelete := sb.inDelete
sb.Unlock()
ep.Lock()
@ -425,7 +447,16 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
}
}
}
return sb.storeUpdate()
// Only update the store if we did not come here as part of
// sandbox delete. If we came here as part of delete then do
// not bother updating the store. The sandbox object will be
// deleted anyway
if !inDelete {
return sb.storeUpdate()
}
return nil
}
func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
@ -437,6 +468,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
sb.Lock()
osSbox := sb.osSbox
inDelete := sb.inDelete
sb.Unlock()
if osSbox != nil {
releaseOSSboxResources(osSbox, ep)
@ -480,7 +512,15 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
sb.updateGateway(gwepAfter)
}
return sb.storeUpdate()
// Only update the store if we did not come here as part of
// sandbox delete. If we came here as part of delete then do
// not bother updating the store. The sandbox object will be
// deleted anyway
if !inDelete {
return sb.storeUpdate()
}
return nil
}
const (

View file

@ -123,6 +123,8 @@ func (sb *sandbox) storeUpdate() error {
ID: sb.id,
}
retry:
sbs.Eps = nil
for _, ep := range sb.getConnectedEndpoints() {
eps := epState{
Nid: ep.getNetwork().ID(),
@ -132,7 +134,16 @@ func (sb *sandbox) storeUpdate() error {
sbs.Eps = append(sbs.Eps, eps)
}
return sb.controller.updateToStore(sbs)
err := sb.controller.updateToStore(sbs)
if err == datastore.ErrKeyModified {
// When we get ErrKeyModified it is sufficient to just
// go back and retry. No need to get the object from
// the store because we always regenerate the store
// state from in memory sandbox state
goto retry
}
return err
}
func (sb *sandbox) storeDelete() error {

View file

@ -82,6 +82,38 @@ func (c *controller) getNetworkFromStore(nid string) (*network, error) {
return nil, fmt.Errorf("network %s not found", nid)
}
func (c *controller) getNetworksForScope(scope string) ([]*network, error) {
var nl []*network
store := c.getStore(scope)
if store == nil {
return nil, nil
}
kvol, err := store.List(datastore.Key(datastore.NetworkKeyPrefix),
&network{ctrlr: c})
if err != nil && err != datastore.ErrKeyNotFound {
return nil, fmt.Errorf("failed to get networks for scope %s: %v",
scope, err)
}
for _, kvo := range kvol {
n := kvo.(*network)
n.ctrlr = c
ec := &endpointCnt{n: n}
err = store.GetObject(datastore.Key(ec.Key()...), ec)
if err != nil {
return nil, fmt.Errorf("could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
}
n.epCnt = ec
nl = append(nl, n)
}
return nl, nil
}
func (c *controller) getNetworksFromStore() ([]*network, error) {
var nl []*network