1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Cleanup dangling sandboxes on boot up

Currently when docker exits ungracefully it may leave
dangling sandboxes which may hold onto precious network
resources. Added checkpoint state for sandboxes which
on boot up will be used to clean up the sandboxes and
network resources.

On bootup the remaining dangling state in the checkpoint
are read and cleaned up before accepting any new
network allocation requests.

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
This commit is contained in:
Jana Radhakrishnan 2015-10-07 20:01:38 -07:00
parent 2e43b58b95
commit e41b4765bd
7 changed files with 274 additions and 76 deletions

View file

@ -191,6 +191,8 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
return nil, err
}
c.sandboxCleanup()
if err := c.startExternalKeyListener(); err != nil {
return nil, err
}
@ -500,6 +502,18 @@ func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (S
c.Lock()
c.sandboxes[sb.id] = sb
c.Unlock()
defer func() {
if err != nil {
c.Lock()
delete(c.sandboxes, sb.id)
c.Unlock()
}
}()
err = sb.storeUpdate()
if err != nil {
return nil, fmt.Errorf("updating the store state of sandbox failed: %v", err)
}
return sb, nil
}

View file

@ -221,7 +221,7 @@ func (ep *endpoint) Exists() bool {
}
func (ep *endpoint) Skip() bool {
return ep.getNetwork().Skip() || ep.DataScope() == datastore.LocalScope
return ep.getNetwork().Skip()
}
func (ep *endpoint) processOptions(options ...EndpointOption) {

View file

@ -1479,21 +1479,11 @@ func TestLeaveAll(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
if err := ep1.Delete(); err != nil {
t.Fatal(err)
}
}()
ep2, err := n.CreateEndpoint("ep2")
if err != nil {
t.Fatal(err)
}
defer func() {
if err := ep2.Delete(); err != nil {
t.Fatal(err)
}
}()
cnt, err := controller.NewSandbox("leaveall")
if err != nil {
@ -1607,21 +1597,11 @@ func TestEndpointUpdateParent(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
if err := ep1.Delete(); err != nil {
t.Fatal(err)
}
}()
ep2, err := n.CreateEndpoint("ep2")
if err != nil {
t.Fatal(err)
}
defer func() {
if err := ep2.Delete(); err != nil {
t.Fatal(err)
}
}()
sbx1, err := controller.NewSandbox(containerID,
libnetwork.OptionHostname("test"),
@ -1661,12 +1641,6 @@ func TestEndpointUpdateParent(t *testing.T) {
if err != nil {
t.Fatal(err)
}
err = ep2.Leave(sbx2)
runtime.LockOSThread()
if err != nil {
t.Fatal(err)
}
}
func TestEnableIPv6(t *testing.T) {
@ -1714,11 +1688,6 @@ func TestEnableIPv6(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
if err := ep1.Delete(); err != nil {
t.Fatal(err)
}
}()
if err := ioutil.WriteFile("/etc/resolv.conf", tmpResolvConf, 0644); err != nil {
t.Fatal(err)
@ -1741,13 +1710,6 @@ func TestEnableIPv6(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
err = ep1.Leave(sb)
runtime.LockOSThread()
if err != nil {
t.Fatal(err)
}
}()
content, err := ioutil.ReadFile(resolvConfPath)
if err != nil {
@ -1884,11 +1846,6 @@ func TestResolvConf(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer func() {
if err := ep.Delete(); err != nil {
t.Fatal(err)
}
}()
if err := ioutil.WriteFile("/etc/resolv.conf", tmpResolvConf1, 0644); err != nil {
t.Fatal(err)
@ -2204,24 +2161,9 @@ func parallelLeave(t *testing.T, rc libnetwork.Sandbox, ep libnetwork.Endpoint,
debugf("L%d.", thrNumber)
var err error
cid := fmt.Sprintf("%drace", thrNumber)
sb := sboxes[thrNumber-1]
if thrNumber == first {
err = ep.Leave(sb)
} else {
err = sb.Delete()
// re add sandbox
defer func() {
if err == nil {
var e error
if sboxes[thrNumber-1], e = controller.NewSandbox(cid); e != nil {
t.Fatalf("Failed to recreate sandbox %s: %v", cid, e)
}
}
}()
}
err = ep.Leave(sb)
runtime.LockOSThread()
if err != nil {
if _, ok := err.(types.ForbiddenError); !ok {
@ -2324,11 +2266,10 @@ func runParallelTests(t *testing.T, thrNumber int) {
debugf("\n")
err = ep.Delete()
err = sb.Delete()
if err != nil {
t.Fatal(err)
}
if thrNumber == first {
for thrdone := range done {
select {
@ -2337,19 +2278,14 @@ func runParallelTests(t *testing.T, thrNumber int) {
}
testns.Close()
err = sb.Delete()
if err != nil {
t.Fatal(err)
}
ep.Delete()
if err != nil {
t.Fatal(err)
}
if err := net2.Delete(); err != nil {
t.Fatal(err)
}
} else {
err = ep.Delete()
if err != nil {
t.Fatal(err)
}
}
}

View file

@ -64,6 +64,8 @@ type sandbox struct {
endpoints epHeap
epPriority map[string]int
joinLeaveDone chan struct{}
dbIndex uint64
dbExists bool
sync.Mutex
}
@ -153,15 +155,24 @@ func (sb *sandbox) Delete() error {
if ep.endpointInGWNetwork() {
continue
}
if err := ep.Leave(sb); err != nil {
log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
}
if err := ep.Delete(); err != nil {
log.Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err)
}
}
if sb.osSbox != nil {
sb.osSbox.Destroy()
}
if err := sb.storeDelete(); err != nil {
log.Warnf("Failed to delete sandbox %s from store: %v", sb.ID(), err)
}
c.Lock()
delete(c.sandboxes, sb.ID())
c.Unlock()
@ -369,7 +380,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
}
}
}
return nil
return sb.storeUpdate()
}
func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
@ -442,7 +453,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
sb.updateGateway(gwepAfter)
}
return nil
return sb.storeUpdate()
}
const (

212
libnetwork/sandbox_store.go Normal file
View file

@ -0,0 +1,212 @@
package libnetwork
import (
"container/heap"
"encoding/json"
"github.com/Sirupsen/logrus"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/osl"
)
const (
sandboxPrefix = "sandbox"
)
type epState struct {
Eid string
Nid string
}
type sbState struct {
ID string
Cid string
c *controller
dbIndex uint64
dbExists bool
Eps []epState
}
func (sbs *sbState) Key() []string {
return []string{sandboxPrefix, sbs.ID}
}
func (sbs *sbState) KeyPrefix() []string {
return []string{sandboxPrefix}
}
func (sbs *sbState) Value() []byte {
b, err := json.Marshal(sbs)
if err != nil {
return nil
}
return b
}
func (sbs *sbState) SetValue(value []byte) error {
return json.Unmarshal(value, sbs)
}
func (sbs *sbState) Index() uint64 {
sbi, err := sbs.c.SandboxByID(sbs.ID)
if err != nil {
return sbs.dbIndex
}
sb := sbi.(*sandbox)
maxIndex := sb.dbIndex
if sbs.dbIndex > maxIndex {
maxIndex = sbs.dbIndex
}
return maxIndex
}
func (sbs *sbState) SetIndex(index uint64) {
sbs.dbIndex = index
sbs.dbExists = true
sbi, err := sbs.c.SandboxByID(sbs.ID)
if err != nil {
return
}
sb := sbi.(*sandbox)
sb.dbIndex = index
sb.dbExists = true
}
func (sbs *sbState) Exists() bool {
if sbs.dbExists {
return sbs.dbExists
}
sbi, err := sbs.c.SandboxByID(sbs.ID)
if err != nil {
return false
}
sb := sbi.(*sandbox)
return sb.dbExists
}
func (sbs *sbState) Skip() bool {
return false
}
func (sbs *sbState) New() datastore.KVObject {
return &sbState{c: sbs.c}
}
func (sbs *sbState) CopyTo(o datastore.KVObject) error {
dstSbs := o.(*sbState)
dstSbs.c = sbs.c
dstSbs.ID = sbs.ID
dstSbs.Cid = sbs.Cid
dstSbs.dbIndex = sbs.dbIndex
dstSbs.dbExists = sbs.dbExists
for _, eps := range sbs.Eps {
dstSbs.Eps = append(dstSbs.Eps, eps)
}
return nil
}
func (sbs *sbState) DataScope() string {
return datastore.LocalScope
}
func (sb *sandbox) storeUpdate() error {
sbs := &sbState{
c: sb.controller,
ID: sb.id,
}
for _, ep := range sb.getConnectedEndpoints() {
eps := epState{
Nid: ep.getNetwork().ID(),
Eid: ep.ID(),
}
sbs.Eps = append(sbs.Eps, eps)
}
return sb.controller.updateToStore(sbs)
}
func (sb *sandbox) storeDelete() error {
sbs := &sbState{
c: sb.controller,
ID: sb.id,
Cid: sb.containerID,
dbIndex: sb.dbIndex,
dbExists: sb.dbExists,
}
return sb.controller.deleteFromStore(sbs)
}
func (c *controller) sandboxCleanup() {
store := c.getStore(datastore.LocalScope)
if store == nil {
logrus.Errorf("Could not find local scope store while trying to cleanup sandboxes")
return
}
kvol, err := store.List(datastore.Key(sandboxPrefix), &sbState{c: c})
if err != nil && err != datastore.ErrKeyNotFound {
logrus.Errorf("failed to get sandboxes for scope %s: %v", store.Scope(), err)
return
}
// It's normal for no sandboxes to be found. Just bail out.
if err == datastore.ErrKeyNotFound {
return
}
for _, kvo := range kvol {
sbs := kvo.(*sbState)
logrus.Printf("sandboxcleanup sbs = %+v", sbs)
sb := &sandbox{
id: sbs.ID,
controller: sbs.c,
containerID: sbs.Cid,
endpoints: epHeap{},
epPriority: map[string]int{},
dbIndex: sbs.dbIndex,
dbExists: true,
}
sb.osSbox, err = osl.NewSandbox(sb.Key(), true)
if err != nil {
logrus.Errorf("failed to create new osl sandbox while trying to build sandbox for cleanup: %v", err)
continue
}
for _, eps := range sbs.Eps {
n, err := c.getNetworkFromStore(eps.Nid)
if err != nil {
logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err)
continue
}
ep, err := n.getEndpointFromStore(eps.Eid)
if err != nil {
logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err)
continue
}
heap.Push(&sb.endpoints, ep)
}
c.Lock()
c.sandboxes[sb.id] = sb
c.Unlock()
if err := sb.Delete(); err != nil {
logrus.Errorf("failed to delete sandbox %s while trying to cleanup: %v", sb.id, err)
}
}
}

View file

@ -76,8 +76,8 @@ func testLocalBackend(t *testing.T, provider, url string, storeConfig *store.Con
if exists, err := store.Exists(datastore.Key(datastore.NetworkKeyPrefix, string(nw.ID()))); !exists || err != nil {
t.Fatalf("Network key should have been created.")
}
if exists, err := store.Exists(datastore.Key([]string{datastore.EndpointKeyPrefix, string(nw.ID()), string(ep.ID())}...)); exists || err != nil {
t.Fatalf("Endpoint key shouldn't have been created.")
if exists, err := store.Exists(datastore.Key([]string{datastore.EndpointKeyPrefix, string(nw.ID()), string(ep.ID())}...)); !exists || err != nil {
t.Fatalf("Endpoint key should have been created.")
}
ctrl.(*controller).getStore(datastore.LocalScope).Close()

View file

@ -30,6 +30,10 @@ function test_single_network_connectivity() {
done
done
if [ -n "$3" ]; then
return
fi
# Teardown the container connections and the network
for i in `seq ${start} ${end}`;
do
@ -70,6 +74,27 @@ function test_single_network_connectivity() {
dnet_cmd $(inst_id2port 1) network rm singlehost
}
@test "Test bridge network dnet ungraceful restart" {
skip_for_circleci
echo $(docker ps)
dnet_cmd $(inst_id2port 1) network create -d bridge singlehost
for iter in `seq 1 2`;
do
if [ "$iter" -eq 1 ]; then
test_single_network_connectivity singlehost 3 skip
else
test_single_network_connectivity singlehost 3
fi
docker restart dnet-1-bridge
sleep 5
done
dnet_cmd $(inst_id2port 1) network rm singlehost
}
@test "Test multiple bridge networks" {
skip_for_circleci