mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
96d819cb06
When the daemon has a lot of containers and even when the daemon tries to give 15 second to stop all containers it is not enough. So the daemon forces a shut down at the end of 15 seconds. And hence in a situation with a lot of containers even gracefully bringing down the daemon will result in a lot of containers fully not brought down. In addition to this the daemon force killing itself can happen in any arbitrary point in time which will result in inconsistent checkpointed state for the sandbox. This makes the cleanup really fail when we come back up and in many cases because of this inability to cleanup properly on restart will result in daemon not able to restart because we are not able to delete the default network. This commit ensures that the sandbox state stored in the disk is never inconsistent so that when we come back up we will always be able to cleanup the sandbox state. Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
222 lines
4.3 KiB
Go
222 lines
4.3 KiB
Go
package libnetwork
|
|
|
|
import (
|
|
"container/heap"
|
|
"encoding/json"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/docker/libnetwork/datastore"
|
|
"github.com/docker/libnetwork/osl"
|
|
)
|
|
|
|
const (
|
|
sandboxPrefix = "sandbox"
|
|
)
|
|
|
|
type epState struct {
|
|
Eid string
|
|
Nid string
|
|
}
|
|
|
|
type sbState struct {
|
|
ID string
|
|
Cid string
|
|
c *controller
|
|
dbIndex uint64
|
|
dbExists bool
|
|
Eps []epState
|
|
}
|
|
|
|
func (sbs *sbState) Key() []string {
|
|
return []string{sandboxPrefix, sbs.ID}
|
|
}
|
|
|
|
func (sbs *sbState) KeyPrefix() []string {
|
|
return []string{sandboxPrefix}
|
|
}
|
|
|
|
func (sbs *sbState) Value() []byte {
|
|
b, err := json.Marshal(sbs)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return b
|
|
}
|
|
|
|
func (sbs *sbState) SetValue(value []byte) error {
|
|
return json.Unmarshal(value, sbs)
|
|
}
|
|
|
|
func (sbs *sbState) Index() uint64 {
|
|
sbi, err := sbs.c.SandboxByID(sbs.ID)
|
|
if err != nil {
|
|
return sbs.dbIndex
|
|
}
|
|
|
|
sb := sbi.(*sandbox)
|
|
maxIndex := sb.dbIndex
|
|
if sbs.dbIndex > maxIndex {
|
|
maxIndex = sbs.dbIndex
|
|
}
|
|
|
|
return maxIndex
|
|
}
|
|
|
|
func (sbs *sbState) SetIndex(index uint64) {
|
|
sbs.dbIndex = index
|
|
sbs.dbExists = true
|
|
|
|
sbi, err := sbs.c.SandboxByID(sbs.ID)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
sb := sbi.(*sandbox)
|
|
sb.dbIndex = index
|
|
sb.dbExists = true
|
|
}
|
|
|
|
func (sbs *sbState) Exists() bool {
|
|
if sbs.dbExists {
|
|
return sbs.dbExists
|
|
}
|
|
|
|
sbi, err := sbs.c.SandboxByID(sbs.ID)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
sb := sbi.(*sandbox)
|
|
return sb.dbExists
|
|
}
|
|
|
|
func (sbs *sbState) Skip() bool {
|
|
return false
|
|
}
|
|
|
|
func (sbs *sbState) New() datastore.KVObject {
|
|
return &sbState{c: sbs.c}
|
|
}
|
|
|
|
func (sbs *sbState) CopyTo(o datastore.KVObject) error {
|
|
dstSbs := o.(*sbState)
|
|
dstSbs.c = sbs.c
|
|
dstSbs.ID = sbs.ID
|
|
dstSbs.Cid = sbs.Cid
|
|
dstSbs.dbIndex = sbs.dbIndex
|
|
dstSbs.dbExists = sbs.dbExists
|
|
|
|
for _, eps := range sbs.Eps {
|
|
dstSbs.Eps = append(dstSbs.Eps, eps)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (sbs *sbState) DataScope() string {
|
|
return datastore.LocalScope
|
|
}
|
|
|
|
func (sb *sandbox) storeUpdate() error {
|
|
sbs := &sbState{
|
|
c: sb.controller,
|
|
ID: sb.id,
|
|
}
|
|
|
|
retry:
|
|
sbs.Eps = nil
|
|
for _, ep := range sb.getConnectedEndpoints() {
|
|
eps := epState{
|
|
Nid: ep.getNetwork().ID(),
|
|
Eid: ep.ID(),
|
|
}
|
|
|
|
sbs.Eps = append(sbs.Eps, eps)
|
|
}
|
|
|
|
err := sb.controller.updateToStore(sbs)
|
|
if err == datastore.ErrKeyModified {
|
|
// When we get ErrKeyModified it is sufficient to just
|
|
// go back and retry. No need to get the object from
|
|
// the store because we always regenerate the store
|
|
// state from in memory sandbox state
|
|
goto retry
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (sb *sandbox) storeDelete() error {
|
|
sbs := &sbState{
|
|
c: sb.controller,
|
|
ID: sb.id,
|
|
Cid: sb.containerID,
|
|
dbIndex: sb.dbIndex,
|
|
dbExists: sb.dbExists,
|
|
}
|
|
|
|
return sb.controller.deleteFromStore(sbs)
|
|
}
|
|
|
|
func (c *controller) sandboxCleanup() {
|
|
store := c.getStore(datastore.LocalScope)
|
|
if store == nil {
|
|
logrus.Errorf("Could not find local scope store while trying to cleanup sandboxes")
|
|
return
|
|
}
|
|
|
|
kvol, err := store.List(datastore.Key(sandboxPrefix), &sbState{c: c})
|
|
if err != nil && err != datastore.ErrKeyNotFound {
|
|
logrus.Errorf("failed to get sandboxes for scope %s: %v", store.Scope(), err)
|
|
return
|
|
}
|
|
|
|
// It's normal for no sandboxes to be found. Just bail out.
|
|
if err == datastore.ErrKeyNotFound {
|
|
return
|
|
}
|
|
|
|
for _, kvo := range kvol {
|
|
sbs := kvo.(*sbState)
|
|
|
|
sb := &sandbox{
|
|
id: sbs.ID,
|
|
controller: sbs.c,
|
|
containerID: sbs.Cid,
|
|
endpoints: epHeap{},
|
|
epPriority: map[string]int{},
|
|
dbIndex: sbs.dbIndex,
|
|
dbExists: true,
|
|
}
|
|
|
|
sb.osSbox, err = osl.NewSandbox(sb.Key(), true)
|
|
if err != nil {
|
|
logrus.Errorf("failed to create new osl sandbox while trying to build sandbox for cleanup: %v", err)
|
|
continue
|
|
}
|
|
|
|
for _, eps := range sbs.Eps {
|
|
n, err := c.getNetworkFromStore(eps.Nid)
|
|
if err != nil {
|
|
logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err)
|
|
continue
|
|
}
|
|
|
|
ep, err := n.getEndpointFromStore(eps.Eid)
|
|
if err != nil {
|
|
logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err)
|
|
continue
|
|
}
|
|
|
|
heap.Push(&sb.endpoints, ep)
|
|
}
|
|
|
|
c.Lock()
|
|
c.sandboxes[sb.id] = sb
|
|
c.Unlock()
|
|
|
|
if err := sb.Delete(); err != nil {
|
|
logrus.Errorf("failed to delete sandbox %s while trying to cleanup: %v", sb.id, err)
|
|
}
|
|
}
|
|
}
|