diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 4980849536..2b07705627 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -191,6 +191,8 @@ func New(cfgOptions ...config.Option) (NetworkController, error) { return nil, err } + c.sandboxCleanup() + if err := c.startExternalKeyListener(); err != nil { return nil, err } @@ -498,6 +500,18 @@ func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (S c.Lock() c.sandboxes[sb.id] = sb c.Unlock() + defer func() { + if err != nil { + c.Lock() + delete(c.sandboxes, sb.id) + c.Unlock() + } + }() + + err = sb.storeUpdate() + if err != nil { + return nil, fmt.Errorf("updating the store state of sandbox failed: %v", err) + } return sb, nil } diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index 3fcc1f26aa..684df12c34 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -221,7 +221,7 @@ func (ep *endpoint) Exists() bool { } func (ep *endpoint) Skip() bool { - return ep.getNetwork().Skip() || ep.DataScope() == datastore.LocalScope + return ep.getNetwork().Skip() } func (ep *endpoint) processOptions(options ...EndpointOption) { diff --git a/libnetwork/libnetwork_test.go b/libnetwork/libnetwork_test.go index e50b7c09a2..8e40c05f5d 100644 --- a/libnetwork/libnetwork_test.go +++ b/libnetwork/libnetwork_test.go @@ -1479,21 +1479,11 @@ func TestLeaveAll(t *testing.T) { if err != nil { t.Fatal(err) } - defer func() { - if err := ep1.Delete(); err != nil { - t.Fatal(err) - } - }() ep2, err := n.CreateEndpoint("ep2") if err != nil { t.Fatal(err) } - defer func() { - if err := ep2.Delete(); err != nil { - t.Fatal(err) - } - }() cnt, err := controller.NewSandbox("leaveall") if err != nil { @@ -1607,21 +1597,11 @@ func TestEndpointUpdateParent(t *testing.T) { if err != nil { t.Fatal(err) } - defer func() { - if err := ep1.Delete(); err != nil { - t.Fatal(err) - } - }() ep2, err := n.CreateEndpoint("ep2") if err != nil { t.Fatal(err) } - defer func() { - if err := ep2.Delete(); err != nil { - t.Fatal(err) - } - }() sbx1, err := controller.NewSandbox(containerID, libnetwork.OptionHostname("test"), @@ -1661,12 +1641,6 @@ func TestEndpointUpdateParent(t *testing.T) { if err != nil { t.Fatal(err) } - - err = ep2.Leave(sbx2) - runtime.LockOSThread() - if err != nil { - t.Fatal(err) - } } func TestEnableIPv6(t *testing.T) { @@ -1714,11 +1688,6 @@ func TestEnableIPv6(t *testing.T) { if err != nil { t.Fatal(err) } - defer func() { - if err := ep1.Delete(); err != nil { - t.Fatal(err) - } - }() if err := ioutil.WriteFile("/etc/resolv.conf", tmpResolvConf, 0644); err != nil { t.Fatal(err) @@ -1741,13 +1710,6 @@ func TestEnableIPv6(t *testing.T) { if err != nil { t.Fatal(err) } - defer func() { - err = ep1.Leave(sb) - runtime.LockOSThread() - if err != nil { - t.Fatal(err) - } - }() content, err := ioutil.ReadFile(resolvConfPath) if err != nil { @@ -1884,11 +1846,6 @@ func TestResolvConf(t *testing.T) { if err != nil { t.Fatal(err) } - defer func() { - if err := ep.Delete(); err != nil { - t.Fatal(err) - } - }() if err := ioutil.WriteFile("/etc/resolv.conf", tmpResolvConf1, 0644); err != nil { t.Fatal(err) @@ -2204,24 +2161,9 @@ func parallelLeave(t *testing.T, rc libnetwork.Sandbox, ep libnetwork.Endpoint, debugf("L%d.", thrNumber) var err error - cid := fmt.Sprintf("%drace", thrNumber) sb := sboxes[thrNumber-1] - if thrNumber == first { - err = ep.Leave(sb) - } else { - err = sb.Delete() - // re add sandbox - defer func() { - if err == nil { - var e error - if sboxes[thrNumber-1], e = controller.NewSandbox(cid); e != nil { - t.Fatalf("Failed to recreate sandbox %s: %v", cid, e) - } - } - }() - } - + err = ep.Leave(sb) runtime.LockOSThread() if err != nil { if _, ok := err.(types.ForbiddenError); !ok { @@ -2324,11 +2266,10 @@ func runParallelTests(t *testing.T, thrNumber int) { debugf("\n") - err = ep.Delete() + err = sb.Delete() if err != nil { t.Fatal(err) } - if thrNumber == first { for thrdone := range done { select { @@ -2337,19 +2278,14 @@ func runParallelTests(t *testing.T, thrNumber int) { } testns.Close() - err = sb.Delete() - if err != nil { - t.Fatal(err) - } - - ep.Delete() - if err != nil { - t.Fatal(err) - } - if err := net2.Delete(); err != nil { t.Fatal(err) } + } else { + err = ep.Delete() + if err != nil { + t.Fatal(err) + } } } diff --git a/libnetwork/sandbox.go b/libnetwork/sandbox.go index eb1941a6ec..b56431112c 100644 --- a/libnetwork/sandbox.go +++ b/libnetwork/sandbox.go @@ -64,6 +64,8 @@ type sandbox struct { endpoints epHeap epPriority map[string]int joinLeaveDone chan struct{} + dbIndex uint64 + dbExists bool sync.Mutex } @@ -153,15 +155,24 @@ func (sb *sandbox) Delete() error { if ep.endpointInGWNetwork() { continue } + if err := ep.Leave(sb); err != nil { log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err) } + + if err := ep.Delete(); err != nil { + log.Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err) + } } if sb.osSbox != nil { sb.osSbox.Destroy() } + if err := sb.storeDelete(); err != nil { + log.Warnf("Failed to delete sandbox %s from store: %v", sb.ID(), err) + } + c.Lock() delete(c.sandboxes, sb.ID()) c.Unlock() @@ -369,7 +380,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error { } } } - return nil + return sb.storeUpdate() } func (sb *sandbox) clearNetworkResources(origEp *endpoint) error { @@ -442,7 +453,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error { sb.updateGateway(gwepAfter) } - return nil + return sb.storeUpdate() } const ( diff --git a/libnetwork/sandbox_store.go b/libnetwork/sandbox_store.go new file mode 100644 index 0000000000..56abbb6424 --- /dev/null +++ b/libnetwork/sandbox_store.go @@ -0,0 +1,212 @@ +package libnetwork + +import ( + "container/heap" + "encoding/json" + + "github.com/Sirupsen/logrus" + "github.com/docker/libnetwork/datastore" + "github.com/docker/libnetwork/osl" +) + +const ( + sandboxPrefix = "sandbox" +) + +type epState struct { + Eid string + Nid string +} + +type sbState struct { + ID string + Cid string + c *controller + dbIndex uint64 + dbExists bool + Eps []epState +} + +func (sbs *sbState) Key() []string { + return []string{sandboxPrefix, sbs.ID} +} + +func (sbs *sbState) KeyPrefix() []string { + return []string{sandboxPrefix} +} + +func (sbs *sbState) Value() []byte { + b, err := json.Marshal(sbs) + if err != nil { + return nil + } + return b +} + +func (sbs *sbState) SetValue(value []byte) error { + return json.Unmarshal(value, sbs) +} + +func (sbs *sbState) Index() uint64 { + sbi, err := sbs.c.SandboxByID(sbs.ID) + if err != nil { + return sbs.dbIndex + } + + sb := sbi.(*sandbox) + maxIndex := sb.dbIndex + if sbs.dbIndex > maxIndex { + maxIndex = sbs.dbIndex + } + + return maxIndex +} + +func (sbs *sbState) SetIndex(index uint64) { + sbs.dbIndex = index + sbs.dbExists = true + + sbi, err := sbs.c.SandboxByID(sbs.ID) + if err != nil { + return + } + + sb := sbi.(*sandbox) + sb.dbIndex = index + sb.dbExists = true +} + +func (sbs *sbState) Exists() bool { + if sbs.dbExists { + return sbs.dbExists + } + + sbi, err := sbs.c.SandboxByID(sbs.ID) + if err != nil { + return false + } + + sb := sbi.(*sandbox) + return sb.dbExists +} + +func (sbs *sbState) Skip() bool { + return false +} + +func (sbs *sbState) New() datastore.KVObject { + return &sbState{c: sbs.c} +} + +func (sbs *sbState) CopyTo(o datastore.KVObject) error { + dstSbs := o.(*sbState) + dstSbs.c = sbs.c + dstSbs.ID = sbs.ID + dstSbs.Cid = sbs.Cid + dstSbs.dbIndex = sbs.dbIndex + dstSbs.dbExists = sbs.dbExists + + for _, eps := range sbs.Eps { + dstSbs.Eps = append(dstSbs.Eps, eps) + } + + return nil +} + +func (sbs *sbState) DataScope() string { + return datastore.LocalScope +} + +func (sb *sandbox) storeUpdate() error { + sbs := &sbState{ + c: sb.controller, + ID: sb.id, + } + + for _, ep := range sb.getConnectedEndpoints() { + eps := epState{ + Nid: ep.getNetwork().ID(), + Eid: ep.ID(), + } + + sbs.Eps = append(sbs.Eps, eps) + } + + return sb.controller.updateToStore(sbs) +} + +func (sb *sandbox) storeDelete() error { + sbs := &sbState{ + c: sb.controller, + ID: sb.id, + Cid: sb.containerID, + dbIndex: sb.dbIndex, + dbExists: sb.dbExists, + } + + return sb.controller.deleteFromStore(sbs) +} + +func (c *controller) sandboxCleanup() { + store := c.getStore(datastore.LocalScope) + if store == nil { + logrus.Errorf("Could not find local scope store while trying to cleanup sandboxes") + return + } + + kvol, err := store.List(datastore.Key(sandboxPrefix), &sbState{c: c}) + if err != nil && err != datastore.ErrKeyNotFound { + logrus.Errorf("failed to get sandboxes for scope %s: %v", store.Scope(), err) + return + } + + // It's normal for no sandboxes to be found. Just bail out. + if err == datastore.ErrKeyNotFound { + return + } + + for _, kvo := range kvol { + sbs := kvo.(*sbState) + + logrus.Printf("sandboxcleanup sbs = %+v", sbs) + sb := &sandbox{ + id: sbs.ID, + controller: sbs.c, + containerID: sbs.Cid, + endpoints: epHeap{}, + epPriority: map[string]int{}, + dbIndex: sbs.dbIndex, + dbExists: true, + } + + sb.osSbox, err = osl.NewSandbox(sb.Key(), true) + if err != nil { + logrus.Errorf("failed to create new osl sandbox while trying to build sandbox for cleanup: %v", err) + continue + } + + for _, eps := range sbs.Eps { + n, err := c.getNetworkFromStore(eps.Nid) + if err != nil { + logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err) + continue + } + + ep, err := n.getEndpointFromStore(eps.Eid) + if err != nil { + logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err) + continue + } + + heap.Push(&sb.endpoints, ep) + } + + c.Lock() + c.sandboxes[sb.id] = sb + c.Unlock() + + if err := sb.Delete(); err != nil { + logrus.Errorf("failed to delete sandbox %s while trying to cleanup: %v", sb.id, err) + } + } +} diff --git a/libnetwork/store_test.go b/libnetwork/store_test.go index eb49535150..b435a788d2 100644 --- a/libnetwork/store_test.go +++ b/libnetwork/store_test.go @@ -76,8 +76,8 @@ func testLocalBackend(t *testing.T, provider, url string, storeConfig *store.Con if exists, err := store.Exists(datastore.Key(datastore.NetworkKeyPrefix, string(nw.ID()))); !exists || err != nil { t.Fatalf("Network key should have been created.") } - if exists, err := store.Exists(datastore.Key([]string{datastore.EndpointKeyPrefix, string(nw.ID()), string(ep.ID())}...)); exists || err != nil { - t.Fatalf("Endpoint key shouldn't have been created.") + if exists, err := store.Exists(datastore.Key([]string{datastore.EndpointKeyPrefix, string(nw.ID()), string(ep.ID())}...)); !exists || err != nil { + t.Fatalf("Endpoint key should have been created.") } ctrl.(*controller).getStore(datastore.LocalScope).Close() diff --git a/libnetwork/test/integration/dnet/bridge.bats b/libnetwork/test/integration/dnet/bridge.bats index 12c265f0ae..ac07e9b3d2 100644 --- a/libnetwork/test/integration/dnet/bridge.bats +++ b/libnetwork/test/integration/dnet/bridge.bats @@ -30,6 +30,10 @@ function test_single_network_connectivity() { done done + if [ -n "$3" ]; then + return + fi + # Teardown the container connections and the network for i in `seq ${start} ${end}`; do @@ -70,6 +74,27 @@ function test_single_network_connectivity() { dnet_cmd $(inst_id2port 1) network rm singlehost } +@test "Test bridge network dnet ungraceful restart" { + skip_for_circleci + + echo $(docker ps) + dnet_cmd $(inst_id2port 1) network create -d bridge singlehost + + for iter in `seq 1 2`; + do + if [ "$iter" -eq 1 ]; then + test_single_network_connectivity singlehost 3 skip + else + test_single_network_connectivity singlehost 3 + fi + + docker restart dnet-1-bridge + sleep 5 + done + + dnet_cmd $(inst_id2port 1) network rm singlehost +} + @test "Test multiple bridge networks" { skip_for_circleci