Merge pull request #675 from mrjana/model

Make sandbox cleanup robust for ungraceful exits
This commit is contained in:
Madhu Venugopal 2015-10-19 22:47:22 +02:00
commit 9145f18132
4 changed files with 75 additions and 7 deletions

View File

@ -66,6 +66,7 @@ type sandbox struct {
joinLeaveDone chan struct{}
dbIndex uint64
dbExists bool
inDelete bool
sync.Mutex
}
@ -146,6 +147,22 @@ func (sb *sandbox) Statistics() (map[string]*types.InterfaceStatistics, error) {
}
func (sb *sandbox) Delete() error {
sb.Lock()
if sb.inDelete {
sb.Unlock()
return types.ForbiddenErrorf("another sandbox delete in progress")
}
// Set the inDelete flag. This will ensure that we don't
// update the store until we have completed all the endpoint
// leaves and deletes. And when endpoint leaves and deletes
// are completed then we can finally delete the sandbox object
// altogether from the data store. If the daemon exits
// ungracefully in the middle of a sandbox delete this way we
// will have all the references to the endpoints in the
// sandbox so that we can clean them up when we restart
sb.inDelete = true
sb.Unlock()
c := sb.controller
// Detach from all endpoints
@ -355,6 +372,10 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) {
joinInfo := ep.joinInfo
ep.Unlock()
if joinInfo == nil {
return
}
// Remove non-interface routes.
for _, r := range joinInfo.StaticRoutes {
if err := osSbox.RemoveStaticRoute(r); err != nil {
@ -386,6 +407,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
sb.Unlock()
return nil
}
inDelete := sb.inDelete
sb.Unlock()
ep.Lock()
@ -425,7 +447,16 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
}
}
}
return sb.storeUpdate()
// Only update the store if we did not come here as part of
// sandbox delete. If we came here as part of delete then do
// not bother updating the store. The sandbox object will be
// deleted anyway
if !inDelete {
return sb.storeUpdate()
}
return nil
}
func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
@ -437,6 +468,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
sb.Lock()
osSbox := sb.osSbox
inDelete := sb.inDelete
sb.Unlock()
if osSbox != nil {
releaseOSSboxResources(osSbox, ep)
@ -480,7 +512,15 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
sb.updateGateway(gwepAfter)
}
return sb.storeUpdate()
// Only update the store if we did not come here as part of
// sandbox delete. If we came here as part of delete then do
// not bother updating the store. The sandbox object will be
// deleted anyway
if !inDelete {
return sb.storeUpdate()
}
return nil
}
const (

View File

@ -123,6 +123,8 @@ func (sb *sandbox) storeUpdate() error {
ID: sb.id,
}
retry:
sbs.Eps = nil
for _, ep := range sb.getConnectedEndpoints() {
eps := epState{
Nid: ep.getNetwork().ID(),
@ -132,7 +134,16 @@ func (sb *sandbox) storeUpdate() error {
sbs.Eps = append(sbs.Eps, eps)
}
return sb.controller.updateToStore(sbs)
err := sb.controller.updateToStore(sbs)
if err == datastore.ErrKeyModified {
// When we get ErrKeyModified it is sufficient to just
// go back and retry. No need to get the object from
// the store because we always regenerate the store
// state from in memory sandbox state
goto retry
}
return err
}
func (sb *sandbox) storeDelete() error {

View File

@ -64,6 +64,23 @@ function test_single_network_connectivity() {
done
}
@test "Test default network dnet ungraceful restart" {
skip_for_circleci
echo $(docker ps)
for iter in `seq 1 2`;
do
if [ "$iter" -eq 1 ]; then
test_single_network_connectivity bridge 3 skip
docker restart dnet-1-bridge
wait_for_dnet $(inst_id2port 1) dnet-1-bridge
else
test_single_network_connectivity bridge 3
fi
done
}
@test "Test bridge network" {
skip_for_circleci

View File

@ -99,7 +99,7 @@ function run_dnet_tests() {
./integration-tmp/bin/bats ./test/integration/dnet/dnet.bats
}
function run_simple_tests() {
function run_simple_consul_tests() {
# Test a single node configuration with a global scope test driver
## Setup
start_dnet 1 simple 1>>${INTEGRATION_ROOT}/test.log 2>&1
@ -205,15 +205,15 @@ if [ -z "$SUITES" ]; then
then
# We can only run a limited list of suites in circleci because of the
# old kernel and limited docker environment.
suites="dnet simple multi_consul multi_zk multi_etcd"
suites="dnet simple_consul multi_consul multi_zk multi_etcd"
else
suites="dnet simple multi_consul multi_zk multi_etcd bridge overlay_consul overlay_zk overlay_etcd"
suites="dnet simple_consul multi_consul multi_zk multi_etcd bridge overlay_consul overlay_zk overlay_etcd"
fi
else
suites="$SUITES"
fi
if [[ "$suites" =~ .*consul.* ]]; then
if [[ ( "$suites" =~ .*consul.* ) || ( "$suites" =~ .*bridge.* ) ]]; then
echo "Starting consul ..."
start_consul 1>>${INTEGRATION_ROOT}/test.log 2>&1
cmap[pr_consul]=pr_consul