2016-03-30 17:42:58 -04:00
|
|
|
package libnetwork
|
|
|
|
|
2021-04-05 20:24:47 -04:00
|
|
|
//go:generate protoc -I.:Godeps/_workspace/src/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/docker/libnetwork,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. agent.proto
|
2016-05-17 17:12:39 -04:00
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
import (
|
2017-01-31 12:13:08 -05:00
|
|
|
"encoding/json"
|
2016-03-30 17:42:58 -04:00
|
|
|
"fmt"
|
|
|
|
"net"
|
2016-06-04 05:10:19 -04:00
|
|
|
"sort"
|
2016-11-22 02:38:03 -05:00
|
|
|
"sync"
|
2016-03-30 17:42:58 -04:00
|
|
|
|
|
|
|
"github.com/docker/go-events"
|
2021-04-05 20:24:47 -04:00
|
|
|
"github.com/docker/docker/libnetwork/cluster"
|
|
|
|
"github.com/docker/docker/libnetwork/datastore"
|
|
|
|
"github.com/docker/docker/libnetwork/discoverapi"
|
|
|
|
"github.com/docker/docker/libnetwork/driverapi"
|
|
|
|
"github.com/docker/docker/libnetwork/networkdb"
|
|
|
|
"github.com/docker/docker/libnetwork/types"
|
2016-05-17 17:12:39 -04:00
|
|
|
"github.com/gogo/protobuf/proto"
|
2017-07-26 17:18:31 -04:00
|
|
|
"github.com/sirupsen/logrus"
|
2016-03-30 17:42:58 -04:00
|
|
|
)
|
|
|
|
|
2016-06-11 07:50:25 -04:00
|
|
|
const (
|
|
|
|
subsysGossip = "networking:gossip"
|
|
|
|
subsysIPSec = "networking:ipsec"
|
|
|
|
keyringSize = 3
|
|
|
|
)
|
|
|
|
|
2016-06-04 05:10:19 -04:00
|
|
|
// ByTime implements sort.Interface for []*types.EncryptionKey based on
|
|
|
|
// the LamportTime field.
|
|
|
|
type ByTime []*types.EncryptionKey
|
|
|
|
|
|
|
|
func (b ByTime) Len() int { return len(b) }
|
|
|
|
func (b ByTime) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
|
|
|
func (b ByTime) Less(i, j int) bool { return b[i].LamportTime < b[j].LamportTime }
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
type agent struct {
|
|
|
|
networkDB *networkdb.NetworkDB
|
|
|
|
bindAddr string
|
2016-07-19 21:17:30 -04:00
|
|
|
advertiseAddr string
|
2017-04-12 21:51:01 -04:00
|
|
|
dataPathAddr string
|
2017-05-03 14:18:33 -04:00
|
|
|
coreCancelFuncs []func()
|
2016-03-30 17:42:58 -04:00
|
|
|
driverCancelFuncs map[string][]func()
|
2016-11-22 02:38:03 -05:00
|
|
|
sync.Mutex
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
2017-04-12 21:51:01 -04:00
|
|
|
func (a *agent) dataPathAddress() string {
|
|
|
|
a.Lock()
|
|
|
|
defer a.Unlock()
|
|
|
|
if a.dataPathAddr != "" {
|
|
|
|
return a.dataPathAddr
|
|
|
|
}
|
|
|
|
return a.advertiseAddr
|
|
|
|
}
|
|
|
|
|
2017-03-02 02:57:37 -05:00
|
|
|
const libnetworkEPTable = "endpoint_table"
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
func getBindAddr(ifaceName string) (string, error) {
|
|
|
|
iface, err := net.InterfaceByName(ifaceName)
|
|
|
|
if err != nil {
|
|
|
|
return "", fmt.Errorf("failed to find interface %s: %v", ifaceName, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
addrs, err := iface.Addrs()
|
|
|
|
if err != nil {
|
|
|
|
return "", fmt.Errorf("failed to get interface addresses: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, a := range addrs {
|
|
|
|
addr, ok := a.(*net.IPNet)
|
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
addrIP := addr.IP
|
|
|
|
|
|
|
|
if addrIP.IsLinkLocalUnicast() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
return addrIP.String(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return "", fmt.Errorf("failed to get bind address")
|
|
|
|
}
|
|
|
|
|
|
|
|
func resolveAddr(addrOrInterface string) (string, error) {
|
|
|
|
// Try and see if this is a valid IP address
|
|
|
|
if net.ParseIP(addrOrInterface) != nil {
|
|
|
|
return addrOrInterface, nil
|
|
|
|
}
|
|
|
|
|
2016-06-12 13:08:26 -04:00
|
|
|
addr, err := net.ResolveIPAddr("ip", addrOrInterface)
|
|
|
|
if err != nil {
|
|
|
|
// If not a valid IP address, it should be a valid interface
|
|
|
|
return getBindAddr(addrOrInterface)
|
|
|
|
}
|
|
|
|
return addr.String(), nil
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
2016-06-05 01:48:10 -04:00
|
|
|
func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
|
2016-06-06 21:17:10 -04:00
|
|
|
drvEnc := discoverapi.DriverEncryptionUpdate{}
|
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
a := c.getAgent()
|
|
|
|
if a == nil {
|
|
|
|
logrus.Debug("Skipping key change as agent is nil")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-06-11 07:50:25 -04:00
|
|
|
// Find the deleted key. If the deleted key was the primary key,
|
|
|
|
// a new primary key should be set before removing if from keyring.
|
2016-11-22 02:38:03 -05:00
|
|
|
c.Lock()
|
|
|
|
added := []byte{}
|
2016-06-11 07:50:25 -04:00
|
|
|
deleted := []byte{}
|
|
|
|
j := len(c.keys)
|
|
|
|
for i := 0; i < j; {
|
|
|
|
same := false
|
|
|
|
for _, key := range keys {
|
|
|
|
if same = key.LamportTime == c.keys[i].LamportTime; same {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !same {
|
|
|
|
cKey := c.keys[i]
|
|
|
|
if cKey.Subsystem == subsysGossip {
|
|
|
|
deleted = cKey.Key
|
|
|
|
}
|
|
|
|
|
2016-06-15 07:10:23 -04:00
|
|
|
if cKey.Subsystem == subsysIPSec {
|
2016-06-11 07:50:25 -04:00
|
|
|
drvEnc.Prune = cKey.Key
|
|
|
|
drvEnc.PruneTag = cKey.LamportTime
|
|
|
|
}
|
|
|
|
c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i]
|
|
|
|
c.keys[j-1] = nil
|
|
|
|
j--
|
|
|
|
}
|
|
|
|
i++
|
|
|
|
}
|
|
|
|
c.keys = c.keys[:j]
|
|
|
|
|
|
|
|
// Find the new key and add it to the key ring
|
|
|
|
for _, key := range keys {
|
|
|
|
same := false
|
|
|
|
for _, cKey := range c.keys {
|
|
|
|
if same = cKey.LamportTime == key.LamportTime; same {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !same {
|
|
|
|
c.keys = append(c.keys, key)
|
|
|
|
if key.Subsystem == subsysGossip {
|
2016-11-22 02:38:03 -05:00
|
|
|
added = key.Key
|
2016-06-11 07:50:25 -04:00
|
|
|
}
|
|
|
|
|
2016-06-15 07:10:23 -04:00
|
|
|
if key.Subsystem == subsysIPSec {
|
2016-06-11 07:50:25 -04:00
|
|
|
drvEnc.Key = key.Key
|
|
|
|
drvEnc.Tag = key.LamportTime
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-11-22 02:38:03 -05:00
|
|
|
c.Unlock()
|
|
|
|
|
|
|
|
if len(added) > 0 {
|
|
|
|
a.networkDB.SetKey(added)
|
|
|
|
}
|
2016-06-11 07:50:25 -04:00
|
|
|
|
2017-06-12 14:30:30 -04:00
|
|
|
key, _, err := c.getPrimaryKeyTag(subsysGossip)
|
2016-08-02 21:34:10 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-06-11 07:50:25 -04:00
|
|
|
a.networkDB.SetPrimaryKey(key)
|
|
|
|
|
2017-06-12 14:30:30 -04:00
|
|
|
key, tag, err := c.getPrimaryKeyTag(subsysIPSec)
|
2016-08-02 21:34:10 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-06-11 07:50:25 -04:00
|
|
|
drvEnc.Primary = key
|
|
|
|
drvEnc.PrimaryTag = tag
|
|
|
|
|
|
|
|
if len(deleted) > 0 {
|
|
|
|
a.networkDB.RemoveKey(deleted)
|
|
|
|
}
|
|
|
|
|
|
|
|
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
|
|
|
|
err := driver.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to update datapath keys in driver %s: %v", name, err)
|
2019-10-31 15:50:49 -04:00
|
|
|
// Attempt to reconfigure keys in case of a update failure
|
|
|
|
// which can arise due to a mismatch of keys
|
|
|
|
// if worker nodes get temporarily disconnected
|
|
|
|
logrus.Warnf("Reconfiguring datapath keys for %s", name)
|
|
|
|
drvCfgEnc := discoverapi.DriverEncryptionConfig{}
|
|
|
|
drvCfgEnc.Keys, drvCfgEnc.Tags = c.getKeys(subsysIPSec)
|
|
|
|
err = driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvCfgEnc)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to reset datapath keys in driver %s: %v", name, err)
|
|
|
|
}
|
2016-06-11 07:50:25 -04:00
|
|
|
}
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-05-25 13:45:38 -04:00
|
|
|
func (c *controller) agentSetup(clusterProvider cluster.Provider) error {
|
2017-05-03 14:18:33 -04:00
|
|
|
agent := c.getAgent()
|
2017-04-18 14:29:25 -04:00
|
|
|
|
2017-05-21 22:25:52 -04:00
|
|
|
// If the agent is already present there is no need to try to initialize it again
|
2017-05-03 14:18:33 -04:00
|
|
|
if agent != nil {
|
|
|
|
return nil
|
2017-04-18 14:29:25 -04:00
|
|
|
}
|
|
|
|
|
2016-07-19 21:17:30 -04:00
|
|
|
bindAddr := clusterProvider.GetLocalAddress()
|
|
|
|
advAddr := clusterProvider.GetAdvertiseAddress()
|
2017-04-12 21:51:01 -04:00
|
|
|
dataAddr := clusterProvider.GetDataPathAddress()
|
2017-04-27 19:58:42 -04:00
|
|
|
remoteList := clusterProvider.GetRemoteAddressList()
|
2017-04-27 19:58:42 -04:00
|
|
|
remoteAddrList := make([]string, 0, len(remoteList))
|
2017-04-27 19:58:42 -04:00
|
|
|
for _, remote := range remoteList {
|
|
|
|
addr, _, _ := net.SplitHostPort(remote)
|
|
|
|
remoteAddrList = append(remoteAddrList, addr)
|
|
|
|
}
|
|
|
|
|
2016-09-22 14:38:35 -04:00
|
|
|
listen := clusterProvider.GetListenAddress()
|
|
|
|
listenAddr, _, _ := net.SplitHostPort(listen)
|
2016-06-05 01:48:10 -04:00
|
|
|
|
2017-07-10 15:05:58 -04:00
|
|
|
logrus.Infof("Initializing Libnetwork Agent Listen-Addr=%s Local-addr=%s Adv-addr=%s Data-addr=%s Remote-addr-list=%v MTU=%d",
|
|
|
|
listenAddr, bindAddr, advAddr, dataAddr, remoteAddrList, c.Config().Daemon.NetworkControlPlaneMTU)
|
2016-11-22 02:38:03 -05:00
|
|
|
if advAddr != "" && agent == nil {
|
2017-04-12 21:51:01 -04:00
|
|
|
if err := c.agentInit(listenAddr, bindAddr, advAddr, dataAddr); err != nil {
|
2017-05-03 14:18:33 -04:00
|
|
|
logrus.Errorf("error in agentInit: %v", err)
|
|
|
|
return err
|
2016-06-05 01:48:10 -04:00
|
|
|
}
|
2017-05-03 14:18:33 -04:00
|
|
|
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
|
2017-04-07 16:31:44 -04:00
|
|
|
if capability.ConnectivityScope == datastore.GlobalScope {
|
2017-05-03 14:18:33 -04:00
|
|
|
c.agentDriverNotify(driver)
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
})
|
2016-06-05 01:48:10 -04:00
|
|
|
}
|
2016-08-19 20:57:58 -04:00
|
|
|
|
2017-04-27 19:58:42 -04:00
|
|
|
if len(remoteAddrList) > 0 {
|
|
|
|
if err := c.agentJoin(remoteAddrList); err != nil {
|
2016-09-27 03:12:07 -04:00
|
|
|
logrus.Errorf("Error in joining gossip cluster : %v(join will be retried in background)", err)
|
2016-06-05 01:48:10 -04:00
|
|
|
}
|
|
|
|
}
|
2016-08-19 20:57:58 -04:00
|
|
|
|
2016-06-05 01:48:10 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-06-11 07:50:25 -04:00
|
|
|
// For a given subsystem getKeys sorts the keys by lamport time and returns
|
|
|
|
// slice of keys and lamport time which can used as a unique tag for the keys
|
|
|
|
func (c *controller) getKeys(subsys string) ([][]byte, []uint64) {
|
2016-11-22 02:38:03 -05:00
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
|
|
|
|
2016-06-11 07:50:25 -04:00
|
|
|
sort.Sort(ByTime(c.keys))
|
|
|
|
|
|
|
|
keys := [][]byte{}
|
|
|
|
tags := []uint64{}
|
|
|
|
for _, key := range c.keys {
|
|
|
|
if key.Subsystem == subsys {
|
|
|
|
keys = append(keys, key.Key)
|
|
|
|
tags = append(tags, key.LamportTime)
|
|
|
|
}
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
2016-06-11 07:50:25 -04:00
|
|
|
keys[0], keys[1] = keys[1], keys[0]
|
|
|
|
tags[0], tags[1] = tags[1], tags[0]
|
|
|
|
return keys, tags
|
|
|
|
}
|
2016-06-06 21:17:10 -04:00
|
|
|
|
2016-08-02 21:34:10 -04:00
|
|
|
// getPrimaryKeyTag returns the primary key for a given subsystem from the
|
2016-06-11 07:50:25 -04:00
|
|
|
// list of sorted key and the associated tag
|
2016-08-02 21:34:10 -04:00
|
|
|
func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64, error) {
|
2016-11-22 02:38:03 -05:00
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
2016-06-05 01:48:10 -04:00
|
|
|
sort.Sort(ByTime(c.keys))
|
2016-06-11 07:50:25 -04:00
|
|
|
keys := []*types.EncryptionKey{}
|
2016-06-05 01:48:10 -04:00
|
|
|
for _, key := range c.keys {
|
2016-06-11 07:50:25 -04:00
|
|
|
if key.Subsystem == subsys {
|
|
|
|
keys = append(keys, key)
|
2016-06-06 21:17:10 -04:00
|
|
|
}
|
2016-06-04 05:10:19 -04:00
|
|
|
}
|
2016-08-02 21:34:10 -04:00
|
|
|
return keys[1].Key, keys[1].LamportTime, nil
|
2016-06-11 07:50:25 -04:00
|
|
|
}
|
|
|
|
|
2017-04-12 21:51:01 -04:00
|
|
|
func (c *controller) agentInit(listenAddr, bindAddrOrInterface, advertiseAddr, dataPathAddr string) error {
|
2016-03-30 17:42:58 -04:00
|
|
|
bindAddr, err := resolveAddr(bindAddrOrInterface)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-05-03 14:18:33 -04:00
|
|
|
keys, _ := c.getKeys(subsysGossip)
|
2016-09-16 18:15:46 -04:00
|
|
|
|
2017-07-10 15:05:58 -04:00
|
|
|
netDBConf := networkdb.DefaultConfig()
|
|
|
|
netDBConf.BindAddr = listenAddr
|
|
|
|
netDBConf.AdvertiseAddr = advertiseAddr
|
|
|
|
netDBConf.Keys = keys
|
|
|
|
if c.Config().Daemon.NetworkControlPlaneMTU != 0 {
|
|
|
|
// Consider the MTU remove the IP hdr (IPv4 or IPv6) and the TCP/UDP hdr.
|
|
|
|
// To be on the safe side let's cut 100 bytes
|
|
|
|
netDBConf.PacketBufferSize = (c.Config().Daemon.NetworkControlPlaneMTU - 100)
|
|
|
|
logrus.Debugf("Control plane MTU: %d will initialize NetworkDB with: %d",
|
|
|
|
c.Config().Daemon.NetworkControlPlaneMTU, netDBConf.PacketBufferSize)
|
|
|
|
}
|
|
|
|
nDB, err := networkdb.New(netDBConf)
|
2016-03-30 17:42:58 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-12-06 14:21:51 -05:00
|
|
|
// Register the diagnostic handlers
|
|
|
|
c.DiagnosticServer.RegisterHandler(nDB, networkdb.NetDbPaths2Func)
|
2017-12-01 13:13:01 -05:00
|
|
|
|
2017-05-03 14:18:33 -04:00
|
|
|
var cancelList []func()
|
2017-03-02 02:57:37 -05:00
|
|
|
ch, cancel := nDB.Watch(libnetworkEPTable, "", "")
|
2017-05-03 14:18:33 -04:00
|
|
|
cancelList = append(cancelList, cancel)
|
2017-01-31 12:13:08 -05:00
|
|
|
nodeCh, cancel := nDB.Watch(networkdb.NodeTable, "", "")
|
2017-05-03 14:18:33 -04:00
|
|
|
cancelList = append(cancelList, cancel)
|
2016-03-30 17:42:58 -04:00
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
c.Lock()
|
2016-03-30 17:42:58 -04:00
|
|
|
c.agent = &agent{
|
|
|
|
networkDB: nDB,
|
|
|
|
bindAddr: bindAddr,
|
2016-07-19 21:17:30 -04:00
|
|
|
advertiseAddr: advertiseAddr,
|
2017-04-12 21:51:01 -04:00
|
|
|
dataPathAddr: dataPathAddr,
|
2017-05-03 14:18:33 -04:00
|
|
|
coreCancelFuncs: cancelList,
|
2016-03-30 17:42:58 -04:00
|
|
|
driverCancelFuncs: make(map[string][]func()),
|
|
|
|
}
|
2016-11-22 02:38:03 -05:00
|
|
|
c.Unlock()
|
2016-03-30 17:42:58 -04:00
|
|
|
|
|
|
|
go c.handleTableEvents(ch, c.handleEpTableEvent)
|
2017-01-31 12:13:08 -05:00
|
|
|
go c.handleTableEvents(nodeCh, c.handleNodeTableEvent)
|
2016-06-06 21:17:10 -04:00
|
|
|
|
2016-06-15 07:10:23 -04:00
|
|
|
drvEnc := discoverapi.DriverEncryptionConfig{}
|
2017-05-03 14:18:33 -04:00
|
|
|
keys, tags := c.getKeys(subsysIPSec)
|
2016-06-15 07:10:23 -04:00
|
|
|
drvEnc.Keys = keys
|
|
|
|
drvEnc.Tags = tags
|
|
|
|
|
2016-06-06 21:17:10 -04:00
|
|
|
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
|
|
|
|
err := driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to set datapath keys in driver %s: %v", name, err)
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
|
2016-07-12 20:35:32 -04:00
|
|
|
c.WalkNetworks(joinCluster)
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-04-27 19:58:42 -04:00
|
|
|
func (c *controller) agentJoin(remoteAddrList []string) error {
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := c.getAgent()
|
|
|
|
if agent == nil {
|
2016-03-30 17:42:58 -04:00
|
|
|
return nil
|
|
|
|
}
|
2017-04-27 19:58:42 -04:00
|
|
|
return agent.networkDB.Join(remoteAddrList)
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *controller) agentDriverNotify(d driverapi.Driver) {
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := c.getAgent()
|
|
|
|
if agent == nil {
|
2016-03-30 17:42:58 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-04-12 21:51:01 -04:00
|
|
|
if err := d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{
|
|
|
|
Address: agent.dataPathAddress(),
|
2016-11-22 02:38:03 -05:00
|
|
|
BindAddress: agent.bindAddr,
|
2016-07-19 21:17:30 -04:00
|
|
|
Self: true,
|
2017-04-12 21:51:01 -04:00
|
|
|
}); err != nil {
|
|
|
|
logrus.Warnf("Failed the node discovery in driver: %v", err)
|
|
|
|
}
|
2016-06-06 21:17:10 -04:00
|
|
|
|
|
|
|
drvEnc := discoverapi.DriverEncryptionConfig{}
|
2016-06-15 07:10:23 -04:00
|
|
|
keys, tags := c.getKeys(subsysIPSec)
|
2016-06-11 07:50:25 -04:00
|
|
|
drvEnc.Keys = keys
|
|
|
|
drvEnc.Tags = tags
|
|
|
|
|
2017-04-12 21:51:01 -04:00
|
|
|
if err := d.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc); err != nil {
|
|
|
|
logrus.Warnf("Failed to set datapath keys in driver: %v", err)
|
|
|
|
}
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *controller) agentClose() {
|
2016-11-01 17:32:19 -04:00
|
|
|
// Acquire current agent instance and reset its pointer
|
|
|
|
// then run closing functions
|
|
|
|
c.Lock()
|
|
|
|
agent := c.agent
|
|
|
|
c.agent = nil
|
|
|
|
c.Unlock()
|
|
|
|
|
2019-01-24 18:45:13 -05:00
|
|
|
// when the agent is closed the cluster provider should be cleaned up
|
|
|
|
c.SetClusterProvider(nil)
|
|
|
|
|
2016-11-01 17:32:19 -04:00
|
|
|
if agent == nil {
|
2016-03-30 17:42:58 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
var cancelList []func()
|
|
|
|
|
|
|
|
agent.Lock()
|
2016-11-01 17:32:19 -04:00
|
|
|
for _, cancelFuncs := range agent.driverCancelFuncs {
|
2017-07-06 12:42:38 -04:00
|
|
|
cancelList = append(cancelList, cancelFuncs...)
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
2017-05-03 14:18:33 -04:00
|
|
|
|
|
|
|
// Add also the cancel functions for the network db
|
2017-07-06 12:42:38 -04:00
|
|
|
cancelList = append(cancelList, agent.coreCancelFuncs...)
|
2016-11-22 02:38:03 -05:00
|
|
|
agent.Unlock()
|
|
|
|
|
|
|
|
for _, cancel := range cancelList {
|
|
|
|
cancel()
|
|
|
|
}
|
2016-03-30 17:42:58 -04:00
|
|
|
|
2016-11-01 17:32:19 -04:00
|
|
|
agent.networkDB.Close()
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
2017-03-02 02:57:37 -05:00
|
|
|
// Task has the backend container details
|
|
|
|
type Task struct {
|
|
|
|
Name string
|
|
|
|
EndpointID string
|
|
|
|
EndpointIP string
|
|
|
|
Info map[string]string
|
|
|
|
}
|
|
|
|
|
|
|
|
// ServiceInfo has service specific details along with the list of backend tasks
|
|
|
|
type ServiceInfo struct {
|
|
|
|
VIP string
|
|
|
|
LocalLBIndex int
|
|
|
|
Tasks []Task
|
|
|
|
Ports []string
|
|
|
|
}
|
|
|
|
|
|
|
|
type epRecord struct {
|
|
|
|
ep EndpointRecord
|
|
|
|
info map[string]string
|
|
|
|
lbIndex int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *network) Services() map[string]ServiceInfo {
|
|
|
|
eps := make(map[string]epRecord)
|
|
|
|
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
agent := n.getController().getAgent()
|
|
|
|
if agent == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Walk through libnetworkEPTable and fetch the driver agnostic endpoint info
|
|
|
|
entries := agent.networkDB.GetTableByNetwork(libnetworkEPTable, n.id)
|
|
|
|
for eid, value := range entries {
|
|
|
|
var epRec EndpointRecord
|
|
|
|
nid := n.ID()
|
2017-12-01 13:13:01 -05:00
|
|
|
if err := proto.Unmarshal(value.Value, &epRec); err != nil {
|
2017-03-02 02:57:37 -05:00
|
|
|
logrus.Errorf("Unmarshal of libnetworkEPTable failed for endpoint %s in network %s, %v", eid, nid, err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
i := n.getController().getLBIndex(epRec.ServiceID, nid, epRec.IngressPorts)
|
|
|
|
eps[eid] = epRecord{
|
|
|
|
ep: epRec,
|
|
|
|
lbIndex: i,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Walk through the driver's tables, have the driver decode the entries
|
|
|
|
// and return the tuple {ep ID, value}. value is a string that coveys
|
|
|
|
// relevant info about the endpoint.
|
|
|
|
d, err := n.driver(true)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Errorf("Could not resolve driver for network %s/%s while fetching services: %v", n.networkType, n.ID(), err)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
for _, table := range n.driverTables {
|
|
|
|
if table.objType != driverapi.EndpointObject {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
entries := agent.networkDB.GetTableByNetwork(table.name, n.id)
|
|
|
|
for key, value := range entries {
|
2017-12-01 13:13:01 -05:00
|
|
|
epID, info := d.DecodeTableEntry(table.name, key, value.Value)
|
2017-03-02 02:57:37 -05:00
|
|
|
if ep, ok := eps[epID]; !ok {
|
|
|
|
logrus.Errorf("Inconsistent driver and libnetwork state for endpoint %s", epID)
|
|
|
|
} else {
|
|
|
|
ep.info = info
|
|
|
|
eps[epID] = ep
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// group the endpoints into a map keyed by the service name
|
|
|
|
sinfo := make(map[string]ServiceInfo)
|
|
|
|
for ep, epr := range eps {
|
|
|
|
var (
|
|
|
|
s ServiceInfo
|
|
|
|
ok bool
|
|
|
|
)
|
|
|
|
if s, ok = sinfo[epr.ep.ServiceName]; !ok {
|
|
|
|
s = ServiceInfo{
|
|
|
|
VIP: epr.ep.VirtualIP,
|
|
|
|
LocalLBIndex: epr.lbIndex,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ports := []string{}
|
|
|
|
if s.Ports == nil {
|
|
|
|
for _, port := range epr.ep.IngressPorts {
|
|
|
|
p := fmt.Sprintf("Target: %d, Publish: %d", port.TargetPort, port.PublishedPort)
|
|
|
|
ports = append(ports, p)
|
|
|
|
}
|
|
|
|
s.Ports = ports
|
|
|
|
}
|
|
|
|
s.Tasks = append(s.Tasks, Task{
|
|
|
|
Name: epr.ep.Name,
|
|
|
|
EndpointID: ep,
|
|
|
|
EndpointIP: epr.ep.EndpointIP,
|
|
|
|
Info: epr.info,
|
|
|
|
})
|
|
|
|
sinfo[epr.ep.ServiceName] = s
|
|
|
|
}
|
|
|
|
return sinfo
|
|
|
|
}
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
func (n *network) isClusterEligible() bool {
|
2017-04-07 16:31:44 -04:00
|
|
|
if n.scope != datastore.SwarmScope || !n.driverIsMultihost() {
|
2016-03-30 17:42:58 -04:00
|
|
|
return false
|
|
|
|
}
|
2016-11-22 02:38:03 -05:00
|
|
|
return n.getController().getAgent() != nil
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (n *network) joinCluster() error {
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := n.getController().getAgent()
|
|
|
|
if agent == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return agent.networkDB.JoinNetwork(n.ID())
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (n *network) leaveCluster() error {
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := n.getController().getAgent()
|
|
|
|
if agent == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return agent.networkDB.LeaveNetwork(n.ID())
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
|
2016-11-11 03:42:34 -05:00
|
|
|
func (ep *endpoint) addDriverInfoToCluster() error {
|
|
|
|
n := ep.getNetwork()
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if ep.joinInfo == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := n.getController().getAgent()
|
2016-11-11 03:42:34 -05:00
|
|
|
if agent == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, te := range ep.joinInfo.driverTableEntries {
|
|
|
|
if err := agent.networkDB.CreateEntry(te.tableName, n.ID(), te.key, te.value); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ep *endpoint) deleteDriverInfoFromCluster() error {
|
|
|
|
n := ep.getNetwork()
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if ep.joinInfo == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := n.getController().getAgent()
|
2016-11-11 03:42:34 -05:00
|
|
|
if agent == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, te := range ep.joinInfo.driverTableEntries {
|
|
|
|
if err := agent.networkDB.DeleteEntry(te.tableName, n.ID(), te.key); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
func (ep *endpoint) addServiceInfoToCluster(sb *sandbox) error {
|
2020-04-03 00:21:47 -04:00
|
|
|
if ep.isAnonymous() && len(ep.myAliases) == 0 || ep.Iface() == nil || ep.Iface().Address() == nil {
|
2017-02-15 18:00:19 -05:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
n := ep.getNetwork()
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
sb.Service.Lock()
|
|
|
|
defer sb.Service.Unlock()
|
|
|
|
logrus.Debugf("addServiceInfoToCluster START for %s %s", ep.svcName, ep.ID())
|
|
|
|
|
|
|
|
// Check that the endpoint is still present on the sandbox before adding it to the service discovery.
|
|
|
|
// This is to handle a race between the EnableService and the sbLeave
|
|
|
|
// It is possible that the EnableService starts, fetches the list of the endpoints and
|
|
|
|
// by the time the addServiceInfoToCluster is called the endpoint got removed from the sandbox
|
|
|
|
// The risk is that the deleteServiceInfoToCluster happens before the addServiceInfoToCluster.
|
|
|
|
// This check under the Service lock of the sandbox ensure the correct behavior.
|
|
|
|
// If the addServiceInfoToCluster arrives first may find or not the endpoint and will proceed or exit
|
|
|
|
// but in any case the deleteServiceInfoToCluster will follow doing the cleanup if needed.
|
|
|
|
// In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is
|
|
|
|
// removed from the list, in this situation the delete will bail out not finding any data to cleanup
|
|
|
|
// and the add will bail out not finding the endpoint on the sandbox.
|
|
|
|
if e := sb.getEndpoint(ep.ID()); e == nil {
|
|
|
|
logrus.Warnf("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
c := n.getController()
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := c.getAgent()
|
2016-05-31 02:55:51 -04:00
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
name := ep.Name()
|
|
|
|
if ep.isAnonymous() {
|
|
|
|
name = ep.MyAliases()[0]
|
|
|
|
}
|
|
|
|
|
2017-02-15 18:00:19 -05:00
|
|
|
var ingressPorts []*PortConfig
|
|
|
|
if ep.svcID != "" {
|
2017-06-06 19:04:50 -04:00
|
|
|
// This is a task part of a service
|
2017-02-15 18:00:19 -05:00
|
|
|
// Gossip ingress ports only in ingress network.
|
|
|
|
if n.ingress {
|
|
|
|
ingressPorts = ep.ingressPorts
|
2016-04-13 20:53:41 -04:00
|
|
|
}
|
2017-06-06 19:04:50 -04:00
|
|
|
if err := c.addServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// This is a container simply attached to an attachable network
|
|
|
|
if err := c.addContainerNameResolution(n.ID(), ep.ID(), name, ep.myAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil {
|
2016-05-17 17:12:39 -04:00
|
|
|
return err
|
|
|
|
}
|
2017-02-15 18:00:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
buf, err := proto.Marshal(&EndpointRecord{
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
Name: name,
|
|
|
|
ServiceName: ep.svcName,
|
|
|
|
ServiceID: ep.svcID,
|
|
|
|
VirtualIP: ep.virtualIP.String(),
|
|
|
|
IngressPorts: ingressPorts,
|
|
|
|
Aliases: ep.svcAliases,
|
|
|
|
TaskAliases: ep.myAliases,
|
|
|
|
EndpointIP: ep.Iface().Address().IP.String(),
|
|
|
|
ServiceDisabled: false,
|
2017-02-15 18:00:19 -05:00
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if agent != nil {
|
2017-03-02 02:57:37 -05:00
|
|
|
if err := agent.networkDB.CreateEntry(libnetworkEPTable, n.ID(), ep.ID(), buf); err != nil {
|
2017-06-18 08:25:58 -04:00
|
|
|
logrus.Warnf("addServiceInfoToCluster NetworkDB CreateEntry failed for %s %s err:%s", ep.id, n.id, err)
|
2017-02-15 18:00:19 -05:00
|
|
|
return err
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
logrus.Debugf("addServiceInfoToCluster END for %s %s", ep.svcName, ep.ID())
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
func (ep *endpoint) deleteServiceInfoFromCluster(sb *sandbox, fullRemove bool, method string) error {
|
2017-02-15 18:00:19 -05:00
|
|
|
if ep.isAnonymous() && len(ep.myAliases) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
n := ep.getNetwork()
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
sb.Service.Lock()
|
|
|
|
defer sb.Service.Unlock()
|
|
|
|
logrus.Debugf("deleteServiceInfoFromCluster from %s START for %s %s", method, ep.svcName, ep.ID())
|
|
|
|
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
// Avoid a race w/ with a container that aborts preemptively. This would
|
|
|
|
// get caught in disableServceInNetworkDB, but we check here to make the
|
|
|
|
// nature of the condition more clear.
|
|
|
|
// See comment in addServiceInfoToCluster()
|
|
|
|
if e := sb.getEndpoint(ep.ID()); e == nil {
|
|
|
|
logrus.Warnf("deleteServiceInfoFromCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
c := n.getController()
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := c.getAgent()
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
name := ep.Name()
|
|
|
|
if ep.isAnonymous() {
|
|
|
|
name = ep.MyAliases()[0]
|
|
|
|
}
|
2016-05-31 02:55:51 -04:00
|
|
|
|
2017-06-18 08:25:58 -04:00
|
|
|
if agent != nil {
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
// First update the networkDB then locally
|
|
|
|
if fullRemove {
|
|
|
|
if err := agent.networkDB.DeleteEntry(libnetworkEPTable, n.ID(), ep.ID()); err != nil {
|
|
|
|
logrus.Warnf("deleteServiceInfoFromCluster NetworkDB DeleteEntry failed for %s %s err:%s", ep.id, n.id, err)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
disableServiceInNetworkDB(agent, n, ep)
|
2017-06-18 08:25:58 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-03 00:21:47 -04:00
|
|
|
if ep.Iface() != nil && ep.Iface().Address() != nil {
|
2017-06-06 19:04:50 -04:00
|
|
|
if ep.svcID != "" {
|
|
|
|
// This is a task part of a service
|
|
|
|
var ingressPorts []*PortConfig
|
|
|
|
if n.ingress {
|
|
|
|
ingressPorts = ep.ingressPorts
|
|
|
|
}
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
if err := c.rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster", true, fullRemove); err != nil {
|
2017-06-06 19:04:50 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// This is a container simply attached to an attachable network
|
|
|
|
if err := c.delContainerNameResolution(n.ID(), ep.ID(), name, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster"); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-04-13 20:53:41 -04:00
|
|
|
}
|
2017-02-15 18:00:19 -05:00
|
|
|
}
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
logrus.Debugf("deleteServiceInfoFromCluster from %s END for %s %s", method, ep.svcName, ep.ID())
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
func disableServiceInNetworkDB(a *agent, n *network, ep *endpoint) {
|
|
|
|
var epRec EndpointRecord
|
|
|
|
|
|
|
|
logrus.Debugf("disableServiceInNetworkDB for %s %s", ep.svcName, ep.ID())
|
|
|
|
|
|
|
|
// Update existing record to indicate that the service is disabled
|
|
|
|
inBuf, err := a.networkDB.GetEntry(libnetworkEPTable, n.ID(), ep.ID())
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("disableServiceInNetworkDB GetEntry failed for %s %s err:%s", ep.id, n.id, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// Should never fail
|
|
|
|
if err := proto.Unmarshal(inBuf, &epRec); err != nil {
|
|
|
|
logrus.Errorf("disableServiceInNetworkDB unmarshal failed for %s %s err:%s", ep.id, n.id, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
epRec.ServiceDisabled = true
|
|
|
|
// Should never fail
|
|
|
|
outBuf, err := proto.Marshal(&epRec)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Errorf("disableServiceInNetworkDB marshalling failed for %s %s err:%s", ep.id, n.id, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// Send update to the whole cluster
|
|
|
|
if err := a.networkDB.UpdateEntry(libnetworkEPTable, n.ID(), ep.ID(), outBuf); err != nil {
|
|
|
|
logrus.Warnf("disableServiceInNetworkDB UpdateEntry failed for %s %s err:%s", ep.id, n.id, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
func (n *network) addDriverWatches() {
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
c := n.getController()
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := c.getAgent()
|
|
|
|
if agent == nil {
|
|
|
|
return
|
|
|
|
}
|
2017-03-02 02:57:37 -05:00
|
|
|
for _, table := range n.driverTables {
|
|
|
|
ch, cancel := agent.networkDB.Watch(table.name, n.ID(), "")
|
2016-11-22 02:38:03 -05:00
|
|
|
agent.Lock()
|
|
|
|
agent.driverCancelFuncs[n.ID()] = append(agent.driverCancelFuncs[n.ID()], cancel)
|
|
|
|
agent.Unlock()
|
2016-03-30 17:42:58 -04:00
|
|
|
go c.handleTableEvents(ch, n.handleDriverTableEvent)
|
|
|
|
d, err := n.driver(false)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Errorf("Could not resolve driver %s while walking driver tabl: %v", n.networkType, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-08-05 11:42:20 -04:00
|
|
|
agent.networkDB.WalkTable(table.name, func(nid, key string, value []byte, deleted bool) bool {
|
|
|
|
// skip the entries that are mark for deletion, this is safe because this function is
|
|
|
|
// called at initialization time so there is no state to delete
|
|
|
|
if nid == n.ID() && !deleted {
|
2017-03-02 02:57:37 -05:00
|
|
|
d.EventNotify(driverapi.Create, nid, table.name, key, value)
|
2016-08-08 14:55:06 -04:00
|
|
|
}
|
2016-03-30 17:42:58 -04:00
|
|
|
return false
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *network) cancelDriverWatches() {
|
|
|
|
if !n.isClusterEligible() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2016-11-22 02:38:03 -05:00
|
|
|
agent := n.getController().getAgent()
|
|
|
|
if agent == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
agent.Lock()
|
|
|
|
cancelFuncs := agent.driverCancelFuncs[n.ID()]
|
|
|
|
delete(agent.driverCancelFuncs, n.ID())
|
|
|
|
agent.Unlock()
|
2016-03-30 17:42:58 -04:00
|
|
|
|
|
|
|
for _, cancel := range cancelFuncs {
|
|
|
|
cancel()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-31 12:49:55 -04:00
|
|
|
func (c *controller) handleTableEvents(ch *events.Channel, fn func(events.Event)) {
|
2016-03-30 17:42:58 -04:00
|
|
|
for {
|
|
|
|
select {
|
2017-05-31 12:49:55 -04:00
|
|
|
case ev := <-ch.C:
|
2016-03-30 17:42:58 -04:00
|
|
|
fn(ev)
|
2017-05-31 12:49:55 -04:00
|
|
|
case <-ch.Done():
|
|
|
|
return
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *network) handleDriverTableEvent(ev events.Event) {
|
|
|
|
d, err := n.driver(false)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Errorf("Could not resolve driver %s while handling driver table event: %v", n.networkType, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
etype driverapi.EventType
|
|
|
|
tname string
|
|
|
|
key string
|
|
|
|
value []byte
|
|
|
|
)
|
|
|
|
|
|
|
|
switch event := ev.(type) {
|
|
|
|
case networkdb.CreateEvent:
|
|
|
|
tname = event.Table
|
|
|
|
key = event.Key
|
|
|
|
value = event.Value
|
|
|
|
etype = driverapi.Create
|
|
|
|
case networkdb.DeleteEvent:
|
|
|
|
tname = event.Table
|
|
|
|
key = event.Key
|
|
|
|
value = event.Value
|
|
|
|
etype = driverapi.Delete
|
|
|
|
case networkdb.UpdateEvent:
|
|
|
|
tname = event.Table
|
|
|
|
key = event.Key
|
|
|
|
value = event.Value
|
|
|
|
etype = driverapi.Delete
|
|
|
|
}
|
|
|
|
|
|
|
|
d.EventNotify(etype, n.ID(), tname, key, value)
|
|
|
|
}
|
|
|
|
|
2017-01-31 12:13:08 -05:00
|
|
|
func (c *controller) handleNodeTableEvent(ev events.Event) {
|
|
|
|
var (
|
|
|
|
value []byte
|
|
|
|
isAdd bool
|
|
|
|
nodeAddr networkdb.NodeAddr
|
|
|
|
)
|
|
|
|
switch event := ev.(type) {
|
|
|
|
case networkdb.CreateEvent:
|
|
|
|
value = event.Value
|
|
|
|
isAdd = true
|
|
|
|
case networkdb.DeleteEvent:
|
|
|
|
value = event.Value
|
|
|
|
case networkdb.UpdateEvent:
|
|
|
|
logrus.Errorf("Unexpected update node table event = %#v", event)
|
|
|
|
}
|
|
|
|
|
|
|
|
err := json.Unmarshal(value, &nodeAddr)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Errorf("Error unmarshalling node table event %v", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
c.processNodeDiscovery([]net.IP{nodeAddr.Addr}, isAdd)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2016-03-30 17:42:58 -04:00
|
|
|
func (c *controller) handleEpTableEvent(ev events.Event) {
|
|
|
|
var (
|
2016-04-13 20:53:41 -04:00
|
|
|
nid string
|
|
|
|
eid string
|
2016-05-17 17:12:39 -04:00
|
|
|
value []byte
|
|
|
|
epRec EndpointRecord
|
2016-03-30 17:42:58 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
switch event := ev.(type) {
|
|
|
|
case networkdb.CreateEvent:
|
2016-04-13 20:53:41 -04:00
|
|
|
nid = event.NetworkID
|
|
|
|
eid = event.Key
|
2016-05-17 17:12:39 -04:00
|
|
|
value = event.Value
|
2016-03-30 17:42:58 -04:00
|
|
|
case networkdb.DeleteEvent:
|
2016-04-13 20:53:41 -04:00
|
|
|
nid = event.NetworkID
|
|
|
|
eid = event.Key
|
2016-05-17 17:12:39 -04:00
|
|
|
value = event.Value
|
2016-03-30 17:42:58 -04:00
|
|
|
case networkdb.UpdateEvent:
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
nid = event.NetworkID
|
|
|
|
eid = event.Key
|
|
|
|
value = event.Value
|
|
|
|
default:
|
2016-03-30 17:42:58 -04:00
|
|
|
logrus.Errorf("Unexpected update service table event = %#v", event)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
err := proto.Unmarshal(value, &epRec)
|
2016-05-17 17:12:39 -04:00
|
|
|
if err != nil {
|
|
|
|
logrus.Errorf("Failed to unmarshal service table value: %v", err)
|
2016-03-30 17:42:58 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
containerName := epRec.Name
|
2016-05-17 17:12:39 -04:00
|
|
|
svcName := epRec.ServiceName
|
|
|
|
svcID := epRec.ServiceID
|
2016-05-25 01:46:18 -04:00
|
|
|
vip := net.ParseIP(epRec.VirtualIP)
|
2016-05-17 17:12:39 -04:00
|
|
|
ip := net.ParseIP(epRec.EndpointIP)
|
2016-05-31 02:55:51 -04:00
|
|
|
ingressPorts := epRec.IngressPorts
|
2017-06-06 19:04:50 -04:00
|
|
|
serviceAliases := epRec.Aliases
|
|
|
|
taskAliases := epRec.TaskAliases
|
2016-03-30 17:42:58 -04:00
|
|
|
|
2017-06-06 19:04:50 -04:00
|
|
|
if containerName == "" || ip == nil {
|
2016-03-30 17:42:58 -04:00
|
|
|
logrus.Errorf("Invalid endpoint name/ip received while handling service table event %s", value)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
switch ev.(type) {
|
|
|
|
case networkdb.CreateEvent:
|
2017-06-13 18:47:31 -04:00
|
|
|
logrus.Debugf("handleEpTableEvent ADD %s R:%v", eid, epRec)
|
2016-05-25 01:46:18 -04:00
|
|
|
if svcID != "" {
|
2017-06-06 19:04:50 -04:00
|
|
|
// This is a remote task part of a service
|
|
|
|
if err := c.addServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent"); err != nil {
|
2017-08-24 16:09:06 -04:00
|
|
|
logrus.Errorf("failed adding service binding for %s epRec:%v err:%v", eid, epRec, err)
|
2016-05-25 01:46:18 -04:00
|
|
|
return
|
|
|
|
}
|
2017-06-06 19:04:50 -04:00
|
|
|
} else {
|
|
|
|
// This is a remote container simply attached to an attachable network
|
|
|
|
if err := c.addContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil {
|
2017-08-24 16:09:06 -04:00
|
|
|
logrus.Errorf("failed adding container name resolution for %s epRec:%v err:%v", eid, epRec, err)
|
2017-06-06 19:04:50 -04:00
|
|
|
}
|
2016-08-02 23:28:33 -04:00
|
|
|
}
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
|
|
|
|
case networkdb.DeleteEvent:
|
2017-06-13 18:47:31 -04:00
|
|
|
logrus.Debugf("handleEpTableEvent DEL %s R:%v", eid, epRec)
|
2016-05-25 01:46:18 -04:00
|
|
|
if svcID != "" {
|
2017-06-06 19:04:50 -04:00
|
|
|
// This is a remote task part of a service
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, true); err != nil {
|
2017-08-24 16:09:06 -04:00
|
|
|
logrus.Errorf("failed removing service binding for %s epRec:%v err:%v", eid, epRec, err)
|
2016-05-25 01:46:18 -04:00
|
|
|
return
|
|
|
|
}
|
2017-06-06 19:04:50 -04:00
|
|
|
} else {
|
|
|
|
// This is a remote container simply attached to an attachable network
|
|
|
|
if err := c.delContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil {
|
2017-08-24 16:09:06 -04:00
|
|
|
logrus.Errorf("failed removing container name resolution for %s epRec:%v err:%v", eid, epRec, err)
|
2017-06-06 19:04:50 -04:00
|
|
|
}
|
2016-08-02 23:28:33 -04:00
|
|
|
}
|
Gracefully remove LB endpoints from services
This patch attempts to allow endpoints to complete servicing connections
while being removed from a service. The change adds a flag to the
endpoint.deleteServiceInfoFromCluster() method to indicate whether this
removal should fully remove connectivity through the load balancer
to the endpoint or should just disable directing further connections to
the endpoint. If the flag is 'false', then the load balancer assigns
a weight of 0 to the endpoint but does not remove it as a linux load
balancing destination. It does remove the endpoint as a docker load
balancing endpoint but tracks it in a special map of "disabled-but-not-
destroyed" load balancing endpoints. This allows traffic to continue
flowing, at least under Linux. If the flag is 'true', then the code
removes the endpoint entirely as a load balancing destination.
The sandbox.DisableService() method invokes deleteServiceInfoFromCluster()
with the flag sent to 'false', while the endpoint.sbLeave() method invokes
it with the flag set to 'true' to complete the removal on endpoint
finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster()
with the flag set to 'true' because renaming attempts to completely
remove and then re-add each endpoint service entry.
The controller.rmServiceBinding() method, which carries out the operation,
similarly gets a new flag for whether to fully remove the endpoint. If
the flag is false, it does the job of moving the endpoint from the
load balancing set to the 'disabled' set. It then removes or
de-weights the entry in the OS load balancing table via
network.rmLBBackend(). It removes the service entirely via said method
ONLY IF there are no more live or disabled load balancing endpoints.
Similarly network.addLBBackend() requires slight tweaking to properly
manage the disabled set.
Finally, this change requires propagating the status of disabled
service endpoints via the networkDB. Accordingly, the patch includes
both code to generate and handle service update messages. It also
augments the service structure with a ServiceDisabled boolean to convey
whether an endpoint should ultimately be removed or just disabled.
This, naturally, required a rebuild of the protocol buffer code as well.
Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 17:04:23 -05:00
|
|
|
case networkdb.UpdateEvent:
|
|
|
|
logrus.Debugf("handleEpTableEvent UPD %s R:%v", eid, epRec)
|
|
|
|
// We currently should only get these to inform us that an endpoint
|
|
|
|
// is disabled. Report if otherwise.
|
|
|
|
if svcID == "" || !epRec.ServiceDisabled {
|
|
|
|
logrus.Errorf("Unexpected update table event for %s epRec:%v", eid, epRec)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// This is a remote task that is part of a service that is now disabled
|
|
|
|
if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, false); err != nil {
|
|
|
|
logrus.Errorf("failed disabling service binding for %s epRec:%v err:%v", eid, epRec, err)
|
|
|
|
return
|
|
|
|
}
|
2016-03-30 17:42:58 -04:00
|
|
|
}
|
|
|
|
}
|