package overlay import ( "encoding/json" "fmt" "net" "sync" "syscall" "github.com/Sirupsen/logrus" "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/driverapi" "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/osl" "github.com/docker/libnetwork/types" "github.com/vishvananda/netlink" "github.com/vishvananda/netlink/nl" ) type networkTable map[string]*network type subnet struct { once *sync.Once vxlanName string brName string vni uint32 initErr error subnetIP *net.IPNet gwIP *net.IPNet } type network struct { id string dbIndex uint64 dbExists bool sbox osl.Sandbox endpoints endpointTable driver *driver joinCnt int once *sync.Once initEpoch int initErr error subnets []*subnet sync.Mutex } func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { if id == "" { return fmt.Errorf("invalid network id") } // Since we perform lazy configuration make sure we try // configuring the driver when we enter CreateNetwork if err := d.configure(); err != nil { return err } n := &network{ id: id, driver: d, endpoints: endpointTable{}, once: &sync.Once{}, subnets: []*subnet{}, } for _, ipd := range ipV4Data { s := &subnet{ subnetIP: ipd.Pool, gwIP: ipd.Gateway, once: &sync.Once{}, } n.subnets = append(n.subnets, s) } if err := n.writeToStore(); err != nil { return fmt.Errorf("failed to update data store for network %v: %v", n.id, err) } d.addNetwork(n) return nil } /* func (d *driver) createNetworkfromStore(nid string) (*network, error) { n := &network{ id: nid, driver: d, endpoints: endpointTable{}, once: &sync.Once{}, subnets: []*subnet{}, } err := d.store.GetObject(datastore.Key(n.Key()...), n) if err != nil { return nil, fmt.Errorf("unable to get network %q from data store, %v", nid, err) } return n, nil }*/ func (d *driver) DeleteNetwork(nid string) error { if nid == "" { return fmt.Errorf("invalid network id") } n := d.network(nid) if n == nil { return fmt.Errorf("could not find network with id %s", nid) } d.deleteNetwork(nid) return n.releaseVxlanID() } func (n *network) joinSandbox() error { n.Lock() if n.joinCnt != 0 { n.joinCnt++ n.Unlock() return nil } n.Unlock() // If there is a race between two go routines here only one will win // the other will wait. n.once.Do(func() { // save the error status of initSandbox in n.initErr so that // all the racing go routines are able to know the status. n.initErr = n.initSandbox() }) return n.initErr } func (n *network) joinSubnetSandbox(s *subnet) error { s.once.Do(func() { s.initErr = n.initSubnetSandbox(s) }) // Increment joinCnt in all the goroutines only when the one time initSandbox // was a success. n.Lock() if s.initErr == nil { n.joinCnt++ } err := s.initErr n.Unlock() return err } func (n *network) leaveSandbox() { n.Lock() n.joinCnt-- if n.joinCnt != 0 { n.Unlock() return } // We are about to destroy sandbox since the container is leaving the network // Reinitialize the once variable so that we will be able to trigger one time // sandbox initialization(again) when another container joins subsequently. n.once = &sync.Once{} for _, s := range n.subnets { s.once = &sync.Once{} } n.Unlock() n.destroySandbox() } func (n *network) destroySandbox() { sbox := n.sandbox() if sbox != nil { for _, iface := range sbox.Info().Interfaces() { iface.Remove() } for _, s := range n.subnets { if s.vxlanName != "" { err := deleteVxlan(s.vxlanName) if err != nil { logrus.Warnf("could not cleanup sandbox properly: %v", err) } } } sbox.Destroy() } } func (n *network) initSubnetSandbox(s *subnet) error { // create a bridge and vxlan device for this subnet and move it to the sandbox brName, err := netutils.GenerateIfaceName("bridge", 7) if err != nil { return err } sbox := n.sandbox() if err := sbox.AddInterface(brName, "br", sbox.InterfaceOptions().Address(s.gwIP), sbox.InterfaceOptions().Bridge(true)); err != nil { return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.IP.String(), err) } vxlanName, err := createVxlan(n.vxlanID(s)) if err != nil { return err } if err := sbox.AddInterface(vxlanName, "vxlan", sbox.InterfaceOptions().Master(brName)); err != nil { return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.IP.String(), err) } n.Lock() s.vxlanName = vxlanName s.brName = brName n.Unlock() return nil } func (n *network) initSandbox() error { n.Lock() n.initEpoch++ n.Unlock() sbox, err := osl.NewSandbox( osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), true) if err != nil { return fmt.Errorf("could not create network sandbox: %v", err) } n.setSandbox(sbox) n.driver.peerDbUpdateSandbox(n.id) var nlSock *nl.NetlinkSocket sbox.InvokeFunc(func() { nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) if err != nil { err = fmt.Errorf("failed to subscribe to neighbor group netlink messages") } }) go n.watchMiss(nlSock) return nil } func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { for { msgs, err := nlSock.Receive() if err != nil { logrus.Errorf("Failed to receive from netlink: %v ", err) continue } for _, msg := range msgs { if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH { continue } neigh, err := netlink.NeighDeserialize(msg.Data) if err != nil { logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err) continue } if neigh.IP.To16() != nil { continue } if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { continue } mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, neigh.IP) if err != nil { logrus.Errorf("could not resolve peer %q: %v", neigh.IP, err) continue } if err := n.driver.peerAdd(n.id, "dummy", neigh.IP, IPmask, mac, vtep, true); err != nil { logrus.Errorf("could not add neighbor entry for missed peer %q: %v", neigh.IP, err) } } } } func (d *driver) addNetwork(n *network) { d.Lock() d.networks[n.id] = n d.Unlock() } func (d *driver) deleteNetwork(nid string) { d.Lock() delete(d.networks, nid) d.Unlock() } func (d *driver) network(nid string) *network { d.Lock() networks := d.networks d.Unlock() n, ok := networks[nid] if !ok { n = d.getNetworkFromStore(nid) if n != nil { n.driver = d n.endpoints = endpointTable{} n.once = &sync.Once{} networks[nid] = n } } return n } func (d *driver) getNetworkFromStore(nid string) *network { if d.store == nil { return nil } n := &network{id: nid} if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil { return nil } return n } func (n *network) sandbox() osl.Sandbox { n.Lock() defer n.Unlock() return n.sbox } func (n *network) setSandbox(sbox osl.Sandbox) { n.Lock() n.sbox = sbox n.Unlock() } func (n *network) vxlanID(s *subnet) uint32 { n.Lock() defer n.Unlock() return s.vni } func (n *network) setVxlanID(s *subnet, vni uint32) { n.Lock() s.vni = vni n.Unlock() } func (n *network) Key() []string { return []string{"overlay", "network", n.id} } func (n *network) KeyPrefix() []string { return []string{"overlay", "network"} } func (n *network) Value() []byte { overlayNetmap := make(map[string]interface{}) s := n.subnets[0] if s == nil { logrus.Errorf("Network %s has no subnets", n.id) return []byte{} } overlayNetmap["subnetIP"] = s.subnetIP.String() overlayNetmap["gwIP"] = s.gwIP.String() overlayNetmap["vni"] = s.vni b, err := json.Marshal(overlayNetmap) if err != nil { return []byte{} } return b } func (n *network) Index() uint64 { return n.dbIndex } func (n *network) SetIndex(index uint64) { n.dbIndex = index n.dbExists = true } func (n *network) Exists() bool { return n.dbExists } func (n *network) Skip() bool { return false } func (n *network) SetValue(value []byte) error { var ( overlayNetmap map[string]interface{} err error ) err = json.Unmarshal(value, &overlayNetmap) if err != nil { return err } subnetIPstr := overlayNetmap["subnetIP"].(string) gwIPstr := overlayNetmap["gwIP"].(string) vni := uint32(overlayNetmap["vni"].(float64)) subnetIP, _ := types.ParseCIDR(subnetIPstr) gwIP, _ := types.ParseCIDR(gwIPstr) s := &subnet{ subnetIP: subnetIP, gwIP: gwIP, vni: vni, once: &sync.Once{}, } n.subnets = append(n.subnets, s) sNet := n.getMatchingSubnet(subnetIP) if sNet != nil { sNet.vni = vni } return nil } func (n *network) DataScope() string { return datastore.GlobalScope } func (n *network) writeToStore() error { return n.driver.store.PutObjectAtomic(n) } func (n *network) releaseVxlanID() error { if n.driver.store == nil { return fmt.Errorf("no datastore configured. cannot release vxlan id") } if len(n.subnets) == 0 { return nil } if err := n.driver.store.DeleteObjectAtomic(n); err != nil { if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound { // In both the above cases we can safely assume that the key has been removed by some other // instance and so simply get out of here return nil } return fmt.Errorf("failed to delete network to vxlan id map: %v", err) } for _, s := range n.subnets { n.driver.vxlanIdm.Release(n.vxlanID(s)) n.setVxlanID(s, 0) } return nil } func (n *network) obtainVxlanID(s *subnet) error { //return if the subnet already has a vxlan id assigned if s.vni != 0 { return nil } if n.driver.store == nil { return fmt.Errorf("no datastore configured. cannot obtain vxlan id") } for { if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil { return fmt.Errorf("getting network %q from datastore failed %v", n.id, err) } if s.vni == 0 { vxlanID, err := n.driver.vxlanIdm.GetID() if err != nil { return fmt.Errorf("failed to allocate vxlan id: %v", err) } n.setVxlanID(s, vxlanID) if err := n.writeToStore(); err != nil { n.driver.vxlanIdm.Release(n.vxlanID(s)) n.setVxlanID(s, 0) if err == datastore.ErrKeyModified { continue } return fmt.Errorf("network %q failed to update data store: %v", n.id, err) } return nil } return nil } } // getSubnetforIP returns the subnet to which the given IP belongs func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { for _, s := range n.subnets { // first check if the mask lengths are the same i, _ := s.subnetIP.Mask.Size() j, _ := ip.Mask.Size() if i != j { continue } if s.subnetIP.Contains(ip.IP) { return s } } return nil } // getMatchingSubnet return the network's subnet that matches the input func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet { if ip == nil { return nil } for _, s := range n.subnets { // first check if the mask lengths are the same i, _ := s.subnetIP.Mask.Size() j, _ := ip.Mask.Size() if i != j { continue } if s.subnetIP.IP.Equal(ip.IP) { return s } } return nil }