From b7d0fefabc3ce1c415a48fda00e196be064d39f2 Mon Sep 17 00:00:00 2001 From: Jana Radhakrishnan Date: Thu, 10 Dec 2015 14:35:49 -0800 Subject: [PATCH] Add overlay network support < 3.16 kernels Add support for overlay networking in older kernels. Following were done to achieve this: + Create the vxlan network in host namespace. + This may create conflicts with other private networks so check for conflicts and fail a join if there is any conflict. + Add iptable based filtering to only allow subnet bridges in the same network to forward traffic while different network bridges will not be able to forward b/w each other. Also block traffic to overlay network originating from the host itself. Signed-off-by: Jana Radhakrishnan --- libnetwork/drivers/overlay/filter.go | 119 ++++++++++++++++ libnetwork/drivers/overlay/ov_network.go | 133 +++++++++++++++--- libnetwork/drivers/overlay/ov_utils.go | 11 +- libnetwork/osl/interface_linux.go | 23 ++- libnetwork/osl/namespace_linux.go | 3 +- libnetwork/test/integration/dnet/helpers.bash | 96 +++++++++++++ .../integration/dnet/overlay-consul-host.bats | 9 ++ .../integration/dnet/run-integration-tests.sh | 17 ++- 8 files changed, 372 insertions(+), 39 deletions(-) create mode 100644 libnetwork/drivers/overlay/filter.go create mode 100644 libnetwork/test/integration/dnet/overlay-consul-host.bats diff --git a/libnetwork/drivers/overlay/filter.go b/libnetwork/drivers/overlay/filter.go new file mode 100644 index 0000000000..01b4d5b01b --- /dev/null +++ b/libnetwork/drivers/overlay/filter.go @@ -0,0 +1,119 @@ +package overlay + +import ( + "fmt" + "sync" + + "github.com/Sirupsen/logrus" + "github.com/docker/libnetwork/iptables" +) + +const globalChain = "DOCKER-OVERLAY" + +var filterOnce sync.Once + +func rawIPTables(args ...string) error { + if output, err := iptables.Raw(args...); err != nil { + return fmt.Errorf("unable to add overlay filter: %v", err) + } else if len(output) != 0 { + return fmt.Errorf("unable to add overlay filter: %s", string(output)) + } + + return nil +} + +func setupGlobalChain() { + if err := rawIPTables("-N", globalChain); err != nil { + logrus.Errorf("could not create global overlay chain: %v", err) + return + } + + if err := rawIPTables("-A", globalChain, "-j", "RETURN"); err != nil { + logrus.Errorf("could not install default return chain in the overlay global chain: %v", err) + return + } +} + +func setNetworkChain(cname string, remove bool) error { + // Initialize the onetime global overlay chain + filterOnce.Do(setupGlobalChain) + + opt := "-N" + // In case of remove, make sure to flush the rules in the chain + if remove { + if err := rawIPTables("-F", cname); err != nil { + return fmt.Errorf("failed to flush overlay network chain %s rules: %v", cname, err) + } + opt = "-X" + } + + if err := rawIPTables(opt, cname); err != nil { + return fmt.Errorf("failed network chain operation %q for chain %s: %v", opt, cname, err) + } + + if !remove { + if err := rawIPTables("-A", cname, "-j", "DROP"); err != nil { + return fmt.Errorf("failed adding default drop rule to overlay network chain %s: %v", cname, err) + } + } + + return nil +} + +func addNetworkChain(cname string) error { + return setNetworkChain(cname, false) +} + +func removeNetworkChain(cname string) error { + return setNetworkChain(cname, true) +} + +func setFilters(cname, brName string, remove bool) error { + opt := "-I" + if remove { + opt = "-D" + } + + // Everytime we set filters for a new subnet make sure to move the global overlay hook to the top of the both the OUTPUT and forward chains + if !remove { + for _, chain := range []string{"OUTPUT", "FORWARD"} { + exists := iptables.Exists(iptables.Filter, chain, "-j", globalChain) + if exists { + if err := rawIPTables("-D", chain, "-j", globalChain); err != nil { + return fmt.Errorf("failed to delete overlay hook in chain %s while moving the hook: %v", chain, err) + } + } + + if err := rawIPTables("-I", chain, "-j", globalChain); err != nil { + return fmt.Errorf("failed to insert overlay hook in chain %s: %v", chain, err) + } + } + } + + // Insert/Delete the rule to jump to per-bridge chain + exists := iptables.Exists(iptables.Filter, globalChain, "-o", brName, "-j", cname) + if (!remove && !exists) || (remove && exists) { + if err := rawIPTables(opt, globalChain, "-o", brName, "-j", cname); err != nil { + return fmt.Errorf("failed to add per-bridge filter rule for bridge %s, network chain %s: %v", brName, cname, err) + } + } + + exists = iptables.Exists(iptables.Filter, cname, "-i", brName, "-j", "ACCEPT") + if (!remove && exists) || (remove && !exists) { + return nil + } + + if err := rawIPTables(opt, cname, "-i", brName, "-j", "ACCEPT"); err != nil { + return fmt.Errorf("failed to add overlay filter rile for network chain %s, bridge %s: %v", cname, brName, err) + } + + return nil +} + +func addFilters(cname, brName string) error { + return setFilters(cname, brName, false) +} + +func removeFilters(cname, brName string) error { + return setFilters(cname, brName, true) +} diff --git a/libnetwork/drivers/overlay/ov_network.go b/libnetwork/drivers/overlay/ov_network.go index e67757b4f0..a1cf5f1f19 100644 --- a/libnetwork/drivers/overlay/ov_network.go +++ b/libnetwork/drivers/overlay/ov_network.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "net" + "os" "sync" "syscall" @@ -12,11 +13,17 @@ import ( "github.com/docker/libnetwork/driverapi" "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/osl" + "github.com/docker/libnetwork/resolvconf" "github.com/docker/libnetwork/types" "github.com/vishvananda/netlink" "github.com/vishvananda/netlink/nl" ) +var ( + hostMode bool + hostModeOnce sync.Once +) + type networkTable map[string]*network type subnet struct { @@ -87,22 +94,6 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat return nil } -/* func (d *driver) createNetworkfromStore(nid string) (*network, error) { - n := &network{ - id: nid, - driver: d, - endpoints: endpointTable{}, - once: &sync.Once{}, - subnets: []*subnet{}, - } - - err := d.store.GetObject(datastore.Key(n.Key()...), n) - if err != nil { - return nil, fmt.Errorf("unable to get network %q from data store, %v", nid, err) - } - return n, nil -}*/ - func (d *driver) DeleteNetwork(nid string) error { if nid == "" { return fmt.Errorf("invalid network id") @@ -171,6 +162,12 @@ func (n *network) destroySandbox() { } for _, s := range n.subnets { + if hostMode { + if err := removeFilters(n.id[:12], s.brName); err != nil { + logrus.Warnf("Could not remove overlay filters: %v", err) + } + } + if s.vxlanName != "" { err := deleteVxlan(s.vxlanName) if err != nil { @@ -178,17 +175,88 @@ func (n *network) destroySandbox() { } } } + + if hostMode { + if err := removeNetworkChain(n.id[:12]); err != nil { + logrus.Warnf("could not remove network chain: %v", err) + } + } + sbox.Destroy() n.setSandbox(nil) } } -func (n *network) initSubnetSandbox(s *subnet) error { - // create a bridge and vxlan device for this subnet and move it to the sandbox - brName, err := netutils.GenerateIfaceName("bridge", 7) - if err != nil { - return err +func setHostMode() { + if os.Getenv("_OVERLAY_HOST_MODE") != "" { + hostMode = true + return } + + err := createVxlan("testvxlan", 1) + if err != nil { + logrus.Errorf("Failed to create testvxlan interface: %v", err) + return + } + + defer deleteVxlan("testvxlan") + + path := "/proc/self/ns/net" + f, err := os.OpenFile(path, os.O_RDONLY, 0) + if err != nil { + logrus.Errorf("Failed to open path %s for network namespace for setting host mode: %v", path, err) + return + } + defer f.Close() + + nsFD := f.Fd() + + iface, err := netlink.LinkByName("testvxlan") + if err != nil { + logrus.Errorf("Failed to get link testvxlan: %v", err) + return + } + + // If we are not able to move the vxlan interface to a namespace + // then fallback to host mode + if err := netlink.LinkSetNsFd(iface, int(nsFD)); err != nil { + hostMode = true + } +} + +func (n *network) generateVxlanName(s *subnet) string { + return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5] +} + +func (n *network) generateBridgeName(s *subnet) string { + return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5] +} + +func isOverlap(nw *net.IPNet) bool { + var nameservers []string + + if rc, err := resolvconf.Get(); err == nil { + nameservers = resolvconf.GetNameserversAsCIDR(rc.Content) + } + + if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil { + return true + } + + if err := netutils.CheckRouteOverlaps(nw); err != nil { + return true + } + + return false +} + +func (n *network) initSubnetSandbox(s *subnet) error { + if hostMode && isOverlap(s.subnetIP) { + return fmt.Errorf("overlay subnet %s has conflicts in the host while running in host mode", s.subnetIP.String()) + } + + // create a bridge and vxlan device for this subnet and move it to the sandbox + brName := n.generateBridgeName(s) sbox := n.sandbox() if err := sbox.AddInterface(brName, "br", @@ -197,7 +265,12 @@ func (n *network) initSubnetSandbox(s *subnet) error { return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err) } - vxlanName, err := createVxlan(n.vxlanID(s)) + vxlanName := n.generateVxlanName(s) + + // Try to delete the vxlan interface if already present + deleteVxlan(vxlanName) + + err := createVxlan(vxlanName, n.vxlanID(s)) if err != nil { return err } @@ -207,6 +280,12 @@ func (n *network) initSubnetSandbox(s *subnet) error { return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err) } + if hostMode { + if err := addFilters(n.id[:12], brName); err != nil { + return err + } + } + n.Lock() s.vxlanName = vxlanName s.brName = brName @@ -220,8 +299,16 @@ func (n *network) initSandbox() error { n.initEpoch++ n.Unlock() + hostModeOnce.Do(setHostMode) + + if hostMode { + if err := addNetworkChain(n.id[:12]); err != nil { + return err + } + } + sbox, err := osl.NewSandbox( - osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), true) + osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), !hostMode) if err != nil { return fmt.Errorf("could not create network sandbox: %v", err) } diff --git a/libnetwork/drivers/overlay/ov_utils.go b/libnetwork/drivers/overlay/ov_utils.go index 5bb257bf51..0b561e7a75 100644 --- a/libnetwork/drivers/overlay/ov_utils.go +++ b/libnetwork/drivers/overlay/ov_utils.go @@ -47,14 +47,9 @@ func createVethPair() (string, string, error) { return name1, name2, nil } -func createVxlan(vni uint32) (string, error) { +func createVxlan(name string, vni uint32) error { defer osl.InitOSContext()() - name, err := netutils.GenerateIfaceName("vxlan", 7) - if err != nil { - return "", fmt.Errorf("error generating vxlan name: %v", err) - } - vxlan := &netlink.Vxlan{ LinkAttrs: netlink.LinkAttrs{Name: name}, VxlanId: int(vni), @@ -66,10 +61,10 @@ func createVxlan(vni uint32) (string, error) { } if err := netlink.LinkAdd(vxlan); err != nil { - return "", fmt.Errorf("error creating vxlan interface: %v", err) + return fmt.Errorf("error creating vxlan interface: %v", err) } - return name, nil + return nil } func deleteVxlan(name string) error { diff --git a/libnetwork/osl/interface_linux.go b/libnetwork/osl/interface_linux.go index d57e7601b1..720bf09764 100644 --- a/libnetwork/osl/interface_linux.go +++ b/libnetwork/osl/interface_linux.go @@ -109,6 +109,7 @@ func (i *nwIface) Remove() error { n.Lock() path := n.path + isDefault := n.isDefault n.Unlock() return nsInvoke(path, func(nsFD int) error { return nil }, func(callerFD int) error { @@ -134,7 +135,7 @@ func (i *nwIface) Remove() error { if err := netlink.LinkDel(iface); err != nil { return fmt.Errorf("failed deleting bridge %q: %v", i.SrcName(), err) } - } else { + } else if !isDefault { // Move the network interface to caller namespace. if err := netlink.LinkSetNsFd(iface, callerFD); err != nil { fmt.Println("LinkSetNsPid failed: ", err) @@ -213,9 +214,15 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If } n.Lock() - i.dstName = fmt.Sprintf("%s%d", i.dstName, n.nextIfIndex) - n.nextIfIndex++ + if n.isDefault { + i.dstName = i.srcName + } else { + i.dstName = fmt.Sprintf("%s%d", i.dstName, n.nextIfIndex) + n.nextIfIndex++ + } + path := n.path + isDefault := n.isDefault n.Unlock() return nsInvoke(path, func(nsFD int) error { @@ -231,9 +238,13 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If return fmt.Errorf("failed to get link by name %q: %v", i.srcName, err) } - // Move the network interface to the destination namespace. - if err := netlink.LinkSetNsFd(iface, nsFD); err != nil { - return fmt.Errorf("failed to set namespace on link %q: %v", i.srcName, err) + // Move the network interface to the destination + // namespace only if the namespace is not a default + // type + if !isDefault { + if err := netlink.LinkSetNsFd(iface, nsFD); err != nil { + return fmt.Errorf("failed to set namespace on link %q: %v", i.srcName, err) + } } return nil diff --git a/libnetwork/osl/namespace_linux.go b/libnetwork/osl/namespace_linux.go index 1b7b230380..07b725c290 100644 --- a/libnetwork/osl/namespace_linux.go +++ b/libnetwork/osl/namespace_linux.go @@ -41,6 +41,7 @@ type networkNamespace struct { staticRoutes []*types.StaticRoute neighbors []*neigh nextIfIndex int + isDefault bool sync.Mutex } @@ -146,7 +147,7 @@ func NewSandbox(key string, osCreate bool) (Sandbox, error) { return nil, err } - return &networkNamespace{path: key}, nil + return &networkNamespace{path: key, isDefault: !osCreate}, nil } func (n *networkNamespace) InterfaceOptions() IfaceOptionSetter { diff --git a/libnetwork/test/integration/dnet/helpers.bash b/libnetwork/test/integration/dnet/helpers.bash index 5f85bd20a5..e1e2fc70e5 100644 --- a/libnetwork/test/integration/dnet/helpers.bash +++ b/libnetwork/test/integration/dnet/helpers.bash @@ -163,6 +163,7 @@ EOF --name=${name} \ --privileged \ -p ${hport}:${cport} \ + -e _OVERLAY_HOST_MODE \ -v $(pwd)/:/go/src/github.com/docker/libnetwork \ -v /tmp:/tmp \ -v $(pwd)/${TMPC_ROOT}:/scratch \ @@ -215,6 +216,21 @@ function runc() { dnet_exec ${dnet} "umount /var/run/netns/c && rm /var/run/netns/c" } +function runc_nofail() { + local dnet + + dnet=${1} + shift + dnet_exec ${dnet} "cp /var/lib/docker/network/files/${1}*/* /scratch/rootfs/etc" + dnet_exec ${dnet} "mkdir -p /var/run/netns" + dnet_exec ${dnet} "touch /var/run/netns/c && mount -o bind /var/run/docker/netns/${1} /var/run/netns/c" + set +e + dnet_exec ${dnet} "ip netns exec c unshare -fmuip --mount-proc chroot \"/scratch/rootfs\" /bin/sh -c \"/bin/mount -t proc proc /proc && ${2}\"" + status="$?" + set -e + dnet_exec ${dnet} "umount /var/run/netns/c && rm /var/run/netns/c" +} + function start_etcd() { local bridge_ip stop_etcd @@ -442,3 +458,83 @@ function test_overlay_singlehost() { dnet_cmd $(inst_id2port 1) network rm multihost } + +function test_overlay_hostmode() { + dnet_suffix=$1 + shift + + echo $(docker ps) + + start=1 + end=2 + # Setup overlay network and connect containers ot it + dnet_cmd $(inst_id2port 1) network create -d overlay multihost1 + dnet_cmd $(inst_id2port 1) network create -d overlay multihost2 + dnet_cmd $(inst_id2port 1) network ls + + for i in `seq ${start} ${end}`; + do + dnet_cmd $(inst_id2port 1) container create mh1_${i} + net_connect 1 mh1_${i} multihost1 + done + + for i in `seq ${start} ${end}`; + do + dnet_cmd $(inst_id2port 1) container create mh2_${i} + net_connect 1 mh2_${i} multihost2 + done + + # Now test connectivity between all the containers using service names + for i in `seq ${start} ${end}`; + do + for j in `seq ${start} ${end}`; + do + if [ "$i" -eq "$j" ]; then + continue + fi + + # Find the IP addresses of the j containers on both networks + hrun runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh1_${i}) "nslookup mh1_$j" + mh1_j_ip=$(echo ${output} | awk '{print $11}') + + hrun runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh2_${i}) "nslookup mh2_$j" + mh2_j_ip=$(echo ${output} | awk '{print $11}') + + # Ping the j containers in the same network and ensure they are successfull + runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh1_${i}) \ + "ping -c 1 mh1_$j" + runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh2_${i}) \ + "ping -c 1 mh2_$j" + + # Try pinging j container IPs from the container in the other network and make sure that they are not successfull + runc_nofail $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh1_${i}) "ping -c 1 ${mh2_j_ip}" + [ "${status}" -ne 0 ] + + runc_nofail $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh2_${i}) "ping -c 1 ${mh1_j_ip}" + [ "${status}" -ne 0 ] + + # Try pinging the j container IPS from the host(dnet container in this case) and make syre that they are not successfull + hrun docker exec -it $(dnet_container_name 1 $dnet_suffix) "ping -c 1 ${mh1_j_ip}" + [ "${status}" -ne 0 ] + + hrun docker exec -it $(dnet_container_name 1 $dnet_suffix) "ping -c 1 ${mh2_j_ip}" + [ "${status}" -ne 0 ] + done + done + + # Teardown the container connections and the network + for i in `seq ${start} ${end}`; + do + net_disconnect 1 mh1_${i} multihost1 + dnet_cmd $(inst_id2port 1) container rm mh1_${i} + done + + for i in `seq ${start} ${end}`; + do + net_disconnect 1 mh2_${i} multihost2 + dnet_cmd $(inst_id2port 1) container rm mh2_${i} + done + + dnet_cmd $(inst_id2port 1) network rm multihost1 + dnet_cmd $(inst_id2port 1) network rm multihost2 +} diff --git a/libnetwork/test/integration/dnet/overlay-consul-host.bats b/libnetwork/test/integration/dnet/overlay-consul-host.bats new file mode 100644 index 0000000000..f80c630313 --- /dev/null +++ b/libnetwork/test/integration/dnet/overlay-consul-host.bats @@ -0,0 +1,9 @@ +# -*- mode: sh -*- +#!/usr/bin/env bats + +load helpers + +@test "Test overlay network hostmode with consul" { + skip_for_circleci + test_overlay_hostmode consul +} diff --git a/libnetwork/test/integration/dnet/run-integration-tests.sh b/libnetwork/test/integration/dnet/run-integration-tests.sh index 9184025c25..a5bf42668a 100755 --- a/libnetwork/test/integration/dnet/run-integration-tests.sh +++ b/libnetwork/test/integration/dnet/run-integration-tests.sh @@ -56,6 +56,21 @@ function run_overlay_consul_tests() { unset cmap[dnet-3-consul] } +function run_overlay_consul_host_tests() { + export _OVERLAY_HOST_MODE="true" + ## Setup + start_dnet 1 consul 1>>${INTEGRATION_ROOT}/test.log 2>&1 + cmap[dnet-1-consul]=dnet-1-consul + + ## Run the test cases + ./integration-tmp/bin/bats ./test/integration/dnet/overlay-consul-host.bats + + ## Teardown + stop_dnet 1 consul 1>>${INTEGRATION_ROOT}/test.log 2>&1 + unset cmap[dnet-1-consul] + unset _OVERLAY_HOST_MODE +} + function run_overlay_zk_tests() { ## Test overlay network with zookeeper start_dnet 1 zookeeper 1>>${INTEGRATION_ROOT}/test.log 2>&1 @@ -207,7 +222,7 @@ if [ -z "$SUITES" ]; then # old kernel and limited docker environment. suites="dnet simple_consul multi_consul multi_zk multi_etcd" else - suites="dnet simple_consul multi_consul multi_zk multi_etcd bridge overlay_consul overlay_zk overlay_etcd" + suites="dnet simple_consul multi_consul multi_zk multi_etcd bridge overlay_consul overlay_consul_host overlay_zk overlay_etcd" fi else suites="$SUITES"