From 7d466c6600fbe07636e763799d0d806ce35a90a2 Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Fri, 8 Sep 2017 14:48:03 -0700 Subject: [PATCH] Fix concurrent CreateNetwork in bridge driver The CreateNetwork in the bridge driver was not able to properly handle concurrent operations causing 2 issues: 1) crash from nil pointer exception 2) not proper handling of conflicting configuration This commit addresses the 2 previous mentioned issues and adds a test for it. The test with the original code has a low failure frequency to confirm the fix I had to add a time.Sleep in the body of the CreateNetwork so to have a 100% failure Signed-off-by: Flavio Crisciani --- libnetwork/drivers/bridge/bridge.go | 168 +++++++++---------- libnetwork/drivers/bridge/bridge_test.go | 42 +++++ libnetwork/drivers/bridge/setup_firewalld.go | 2 +- 3 files changed, 118 insertions(+), 94 deletions(-) diff --git a/libnetwork/drivers/bridge/bridge.go b/libnetwork/drivers/bridge/bridge.go index 64a2743dcf..1fa8f0e921 100644 --- a/libnetwork/drivers/bridge/bridge.go +++ b/libnetwork/drivers/bridge/bridge.go @@ -42,6 +42,14 @@ const ( DefaultGatewayV6AuxKey = "DefaultGatewayIPv6" ) +type defaultBridgeNetworkConflict struct { + ID string +} + +func (d defaultBridgeNetworkConflict) Error() string { + return fmt.Sprintf("Stale default bridge network %s", d.ID) +} + type iptableCleanFunc func() error type iptablesCleanFuncs []iptableCleanFunc @@ -137,6 +145,7 @@ type driver struct { networks map[string]*bridgeNetwork store datastore.DataStore nlh *netlink.Handle + configNetwork sync.Mutex sync.Mutex } @@ -322,41 +331,6 @@ func (n *bridgeNetwork) isolateNetwork(others []*bridgeNetwork, enable bool) err return nil } -// Checks whether this network's configuration for the network with this id conflicts with any of the passed networks -func (c *networkConfiguration) conflictsWithNetworks(id string, others []*bridgeNetwork) error { - for _, nw := range others { - - nw.Lock() - nwID := nw.id - nwConfig := nw.config - nwBridge := nw.bridge - nw.Unlock() - - if nwID == id { - continue - } - // Verify the name (which may have been set by newInterface()) does not conflict with - // existing bridge interfaces. Ironically the system chosen name gets stored in the config... - // Basically we are checking if the two original configs were both empty. - if nwConfig.BridgeName == c.BridgeName { - return types.ForbiddenErrorf("conflicts with network %s (%s) by bridge name", nwID, nwConfig.BridgeName) - } - // If this network config specifies the AddressIPv4, we need - // to make sure it does not conflict with any previously allocated - // bridges. This could not be completely caught by the config conflict - // check, because networks which config does not specify the AddressIPv4 - // get their address and subnet selected by the driver (see electBridgeIPv4()) - if c.AddressIPv4 != nil && nwBridge.bridgeIPv4 != nil { - if nwBridge.bridgeIPv4.Contains(c.AddressIPv4.IP) || - c.AddressIPv4.Contains(nwBridge.bridgeIPv4.IP) { - return types.ForbiddenErrorf("conflicts with network %s (%s) by ip network", nwID, nwConfig.BridgeName) - } - } - } - - return nil -} - func (d *driver) configure(option map[string]interface{}) error { var ( config *configuration @@ -602,11 +576,27 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d return err } - err = config.processIPAM(id, ipV4Data, ipV6Data) - if err != nil { + if err = config.processIPAM(id, ipV4Data, ipV6Data); err != nil { return err } + // start the critical section, from this point onward we are dealing with the list of networks + // so to be consistent we cannot allow that the list changes + d.configNetwork.Lock() + defer d.configNetwork.Unlock() + + // check network conflicts + if err = d.checkConflict(config); err != nil { + nerr, ok := err.(defaultBridgeNetworkConflict) + if !ok { + return err + } + // Got a conflict with a stale default network, clean that up and continue + logrus.Warn(nerr) + d.deleteNetwork(nerr.ID) + } + + // there is no conflict, now create the network if err = d.createNetwork(config); err != nil { return err } @@ -614,33 +604,47 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d return d.storeUpdate(config) } +func (d *driver) checkConflict(config *networkConfiguration) error { + networkList := d.getNetworks() + for _, nw := range networkList { + nw.Lock() + nwConfig := nw.config + nw.Unlock() + if err := nwConfig.Conflicts(config); err != nil { + if config.DefaultBridge { + // We encountered and identified a stale default network + // We must delete it as libnetwork is the source of truth + // The default network being created must be the only one + // This can happen only from docker 1.12 on ward + logrus.Infof("Found stale default bridge network %s (%s)", nwConfig.ID, nwConfig.BridgeName) + return defaultBridgeNetworkConflict{nwConfig.ID} + } + + return types.ForbiddenErrorf("cannot create network %s (%s): conflicts with network %s (%s): %s", + config.ID, config.BridgeName, nwConfig.ID, nwConfig.BridgeName, err.Error()) + } + } + return nil +} + func (d *driver) createNetwork(config *networkConfiguration) error { var err error defer osl.InitOSContext()() networkList := d.getNetworks() - for i, nw := range networkList { - nw.Lock() - nwConfig := nw.config - nw.Unlock() - if err := nwConfig.Conflicts(config); err != nil { - if config.DefaultBridge { - // We encountered and identified a stale default network - // We must delete it as libnetwork is the source of thruth - // The default network being created must be the only one - // This can happen only from docker 1.12 on ward - logrus.Infof("Removing stale default bridge network %s (%s)", nwConfig.ID, nwConfig.BridgeName) - if err := d.DeleteNetwork(nwConfig.ID); err != nil { - logrus.Warnf("Failed to remove stale default network: %s (%s): %v. Will remove from store.", nwConfig.ID, nwConfig.BridgeName, err) - d.storeDelete(nwConfig) - } - networkList = append(networkList[:i], networkList[i+1:]...) - } else { - return types.ForbiddenErrorf("cannot create network %s (%s): conflicts with network %s (%s): %s", - config.ID, config.BridgeName, nwConfig.ID, nwConfig.BridgeName, err.Error()) - } - } + + // Initialize handle when needed + d.Lock() + if d.nlh == nil { + d.nlh = ns.NlHandle() + } + d.Unlock() + + // Create or retrieve the bridge L3 interface + bridgeIface, err := newInterface(d.nlh, config) + if err != nil { + return err } // Create and set network handler in driver @@ -649,6 +653,7 @@ func (d *driver) createNetwork(config *networkConfiguration) error { endpoints: make(map[string]*bridgeEndpoint), config: config, portMapper: portmapper.New(d.config.UserlandProxyPath), + bridge: bridgeIface, driver: d, } @@ -665,35 +670,15 @@ func (d *driver) createNetwork(config *networkConfiguration) error { } }() - // Initialize handle when needed - d.Lock() - if d.nlh == nil { - d.nlh = ns.NlHandle() - } - d.Unlock() - - // Create or retrieve the bridge L3 interface - bridgeIface, err := newInterface(d.nlh, config) - if err != nil { - return err - } - network.bridge = bridgeIface - - // Verify the network configuration does not conflict with previously installed - // networks. This step is needed now because driver might have now set the bridge - // name on this config struct. And because we need to check for possible address - // conflicts, so we need to check against operationa lnetworks. - if err = config.conflictsWithNetworks(config.ID, networkList); err != nil { - return err - } - + // Add inter-network communication rules. setupNetworkIsolationRules := func(config *networkConfiguration, i *bridgeInterface) error { if err := network.isolateNetwork(networkList, true); err != nil { - if err := network.isolateNetwork(networkList, false); err != nil { + if err = network.isolateNetwork(networkList, false); err != nil { logrus.Warnf("Failed on removing the inter-network iptables rules on cleanup: %v", err) } return err } + // register the cleanup function network.registerIptCleanFunc(func() error { nwList := d.getNetworks() return network.isolateNetwork(nwList, false) @@ -763,18 +748,21 @@ func (d *driver) createNetwork(config *networkConfiguration) error { // Apply the prepared list of steps, and abort at the first error. bridgeSetup.queueStep(setupDeviceUp) - if err = bridgeSetup.apply(); err != nil { - return err - } - - return nil + return bridgeSetup.apply() } func (d *driver) DeleteNetwork(nid string) error { + + d.configNetwork.Lock() + defer d.configNetwork.Unlock() + + return d.deleteNetwork(nid) +} + +func (d *driver) deleteNetwork(nid string) error { var err error defer osl.InitOSContext()() - // Get network handler and remove it from driver d.Lock() n, ok := d.networks[nid] @@ -818,12 +806,6 @@ func (d *driver) DeleteNetwork(nid string) error { } }() - // Sanity check - if n == nil { - err = driverapi.ErrNoNetwork(nid) - return err - } - switch config.BridgeIfaceCreator { case ifaceCreatedByLibnetwork, ifaceCreatorUnknown: // We only delete the bridge if it was created by the bridge driver and diff --git a/libnetwork/drivers/bridge/bridge_test.go b/libnetwork/drivers/bridge/bridge_test.go index 3fbddc6e63..612844f78b 100644 --- a/libnetwork/drivers/bridge/bridge_test.go +++ b/libnetwork/drivers/bridge/bridge_test.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "regexp" + "strconv" "testing" "github.com/docker/libnetwork/driverapi" @@ -1074,3 +1075,44 @@ func TestCreateWithExistingBridge(t *testing.T) { t.Fatal("Deleting bridge network that using existing bridge interface unexpectedly deleted the bridge interface") } } + +func TestCreateParallel(t *testing.T) { + if !testutils.IsRunningInContainer() { + defer testutils.SetupTestOSContext(t)() + } + + d := newDriver() + + if err := d.configure(nil); err != nil { + t.Fatalf("Failed to setup driver config: %v", err) + } + + ch := make(chan error, 100) + for i := 0; i < 100; i++ { + go func(name string, ch chan<- error) { + config := &networkConfiguration{BridgeName: name} + genericOption := make(map[string]interface{}) + genericOption[netlabel.GenericData] = config + if err := d.CreateNetwork(name, genericOption, nil, getIPv4Data(t, "docker0"), nil); err != nil { + ch <- fmt.Errorf("failed to create %s", name) + return + } + if err := d.CreateNetwork(name, genericOption, nil, getIPv4Data(t, "docker0"), nil); err == nil { + ch <- fmt.Errorf("failed was able to create overlap %s", name) + return + } + ch <- nil + }("net"+strconv.Itoa(i), ch) + } + // wait for the go routines + var success int + for i := 0; i < 100; i++ { + val := <-ch + if val == nil { + success++ + } + } + if success != 1 { + t.Fatalf("Success should be 1 instead: %d", success) + } +} diff --git a/libnetwork/drivers/bridge/setup_firewalld.go b/libnetwork/drivers/bridge/setup_firewalld.go index fc45a7e983..50cbdb1ddc 100644 --- a/libnetwork/drivers/bridge/setup_firewalld.go +++ b/libnetwork/drivers/bridge/setup_firewalld.go @@ -9,7 +9,7 @@ func (n *bridgeNetwork) setupFirewalld(config *networkConfiguration, i *bridgeIn d.Unlock() // Sanity check. - if driverConfig.EnableIPTables == false { + if !driverConfig.EnableIPTables { return IPTableCfgError(config.BridgeName) }