mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
![Andrea Luzzardi](/assets/img/avatar_default.png)
Right now, MAC addresses are randomly generated by the kernel when creating the veth interfaces. This causes different issues related to ARP, such as #4581, #5737 and #8269. This change adds support for consistent MAC addresses, guaranteeing that an IP address will always end up with the same MAC address, no matter what. Since IP addresses are already guaranteed to be unique by the IPAllocator, MAC addresses will inherit this property as well for free. Consistent mac addresses is also a requirement for stable networking (#8297) since re-using the same IP address on a different MAC address triggers the ARP issue. Finally, this change makes the MAC address accessible through docker inspect, which fixes #4033. Signed-off-by: Andrea Luzzardi <aluzzardi@gmail.com>
544 lines
16 KiB
Go
544 lines
16 KiB
Go
package bridge
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/docker/docker/daemon/networkdriver"
|
|
"github.com/docker/docker/daemon/networkdriver/ipallocator"
|
|
"github.com/docker/docker/daemon/networkdriver/portallocator"
|
|
"github.com/docker/docker/daemon/networkdriver/portmapper"
|
|
"github.com/docker/docker/engine"
|
|
"github.com/docker/docker/pkg/iptables"
|
|
"github.com/docker/docker/pkg/log"
|
|
"github.com/docker/docker/pkg/networkfs/resolvconf"
|
|
"github.com/docker/docker/pkg/parsers/kernel"
|
|
"github.com/docker/libcontainer/netlink"
|
|
)
|
|
|
|
const (
|
|
DefaultNetworkBridge = "docker0"
|
|
MaxAllocatedPortAttempts = 10
|
|
)
|
|
|
|
// Network interface represents the networking stack of a container
|
|
type networkInterface struct {
|
|
IP net.IP
|
|
PortMappings []net.Addr // there are mappings to the host interfaces
|
|
}
|
|
|
|
type ifaces struct {
|
|
c map[string]*networkInterface
|
|
sync.Mutex
|
|
}
|
|
|
|
func (i *ifaces) Set(key string, n *networkInterface) {
|
|
i.Lock()
|
|
i.c[key] = n
|
|
i.Unlock()
|
|
}
|
|
|
|
func (i *ifaces) Get(key string) *networkInterface {
|
|
i.Lock()
|
|
res := i.c[key]
|
|
i.Unlock()
|
|
return res
|
|
}
|
|
|
|
var (
|
|
addrs = []string{
|
|
// Here we don't follow the convention of using the 1st IP of the range for the gateway.
|
|
// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
|
|
// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
|
|
// on the internal addressing or other stupid things like that.
|
|
// They shouldn't, but hey, let's not break them unless we really have to.
|
|
"172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
|
|
"10.0.42.1/16", // Don't even try using the entire /8, that's too intrusive
|
|
"10.1.42.1/16",
|
|
"10.42.42.1/16",
|
|
"172.16.42.1/24",
|
|
"172.16.43.1/24",
|
|
"172.16.44.1/24",
|
|
"10.0.42.1/24",
|
|
"10.0.43.1/24",
|
|
"192.168.42.1/24",
|
|
"192.168.43.1/24",
|
|
"192.168.44.1/24",
|
|
}
|
|
|
|
bridgeIface string
|
|
bridgeNetwork *net.IPNet
|
|
|
|
defaultBindingIP = net.ParseIP("0.0.0.0")
|
|
currentInterfaces = ifaces{c: make(map[string]*networkInterface)}
|
|
)
|
|
|
|
func InitDriver(job *engine.Job) engine.Status {
|
|
var (
|
|
network *net.IPNet
|
|
enableIPTables = job.GetenvBool("EnableIptables")
|
|
icc = job.GetenvBool("InterContainerCommunication")
|
|
ipMasq = job.GetenvBool("EnableIpMasq")
|
|
ipForward = job.GetenvBool("EnableIpForward")
|
|
bridgeIP = job.Getenv("BridgeIP")
|
|
fixedCIDR = job.Getenv("FixedCIDR")
|
|
)
|
|
|
|
if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" {
|
|
defaultBindingIP = net.ParseIP(defaultIP)
|
|
}
|
|
|
|
bridgeIface = job.Getenv("BridgeIface")
|
|
usingDefaultBridge := false
|
|
if bridgeIface == "" {
|
|
usingDefaultBridge = true
|
|
bridgeIface = DefaultNetworkBridge
|
|
}
|
|
|
|
addr, err := networkdriver.GetIfaceAddr(bridgeIface)
|
|
if err != nil {
|
|
// If we're not using the default bridge, fail without trying to create it
|
|
if !usingDefaultBridge {
|
|
return job.Error(err)
|
|
}
|
|
// If the iface is not found, try to create it
|
|
if err := createBridge(bridgeIP); err != nil {
|
|
return job.Error(err)
|
|
}
|
|
|
|
addr, err = networkdriver.GetIfaceAddr(bridgeIface)
|
|
if err != nil {
|
|
return job.Error(err)
|
|
}
|
|
network = addr.(*net.IPNet)
|
|
} else {
|
|
network = addr.(*net.IPNet)
|
|
// validate that the bridge ip matches the ip specified by BridgeIP
|
|
if bridgeIP != "" {
|
|
bip, _, err := net.ParseCIDR(bridgeIP)
|
|
if err != nil {
|
|
return job.Error(err)
|
|
}
|
|
if !network.IP.Equal(bip) {
|
|
return job.Errorf("bridge ip (%s) does not match existing bridge configuration %s", network.IP, bip)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Configure iptables for link support
|
|
if enableIPTables {
|
|
if err := setupIPTables(addr, icc, ipMasq); err != nil {
|
|
return job.Error(err)
|
|
}
|
|
}
|
|
|
|
if ipForward {
|
|
// Enable IPv4 forwarding
|
|
if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
|
|
job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
|
|
}
|
|
}
|
|
|
|
// We can always try removing the iptables
|
|
if err := iptables.RemoveExistingChain("DOCKER"); err != nil {
|
|
return job.Error(err)
|
|
}
|
|
|
|
if enableIPTables {
|
|
chain, err := iptables.NewChain("DOCKER", bridgeIface)
|
|
if err != nil {
|
|
return job.Error(err)
|
|
}
|
|
portmapper.SetIptablesChain(chain)
|
|
}
|
|
|
|
bridgeNetwork = network
|
|
if fixedCIDR != "" {
|
|
_, subnet, err := net.ParseCIDR(fixedCIDR)
|
|
if err != nil {
|
|
return job.Error(err)
|
|
}
|
|
log.Debugf("Subnet: %v", subnet)
|
|
if err := ipallocator.RegisterSubnet(bridgeNetwork, subnet); err != nil {
|
|
return job.Error(err)
|
|
}
|
|
}
|
|
|
|
// https://github.com/docker/docker/issues/2768
|
|
job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP)
|
|
|
|
for name, f := range map[string]engine.Handler{
|
|
"allocate_interface": Allocate,
|
|
"release_interface": Release,
|
|
"allocate_port": AllocatePort,
|
|
"link": LinkContainers,
|
|
} {
|
|
if err := job.Eng.Register(name, f); err != nil {
|
|
return job.Error(err)
|
|
}
|
|
}
|
|
return engine.StatusOK
|
|
}
|
|
|
|
func setupIPTables(addr net.Addr, icc, ipmasq bool) error {
|
|
// Enable NAT
|
|
|
|
if ipmasq {
|
|
natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-o", bridgeIface, "-j", "MASQUERADE"}
|
|
|
|
if !iptables.Exists(natArgs...) {
|
|
if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil {
|
|
return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
|
|
} else if len(output) != 0 {
|
|
return fmt.Errorf("Error iptables postrouting: %s", output)
|
|
}
|
|
}
|
|
}
|
|
|
|
var (
|
|
args = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"}
|
|
acceptArgs = append(args, "ACCEPT")
|
|
dropArgs = append(args, "DROP")
|
|
)
|
|
|
|
if !icc {
|
|
iptables.Raw(append([]string{"-D"}, acceptArgs...)...)
|
|
|
|
if !iptables.Exists(dropArgs...) {
|
|
log.Debugf("Disable inter-container communication")
|
|
if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil {
|
|
return fmt.Errorf("Unable to prevent intercontainer communication: %s", err)
|
|
} else if len(output) != 0 {
|
|
return fmt.Errorf("Error disabling intercontainer communication: %s", output)
|
|
}
|
|
}
|
|
} else {
|
|
iptables.Raw(append([]string{"-D"}, dropArgs...)...)
|
|
|
|
if !iptables.Exists(acceptArgs...) {
|
|
log.Debugf("Enable inter-container communication")
|
|
if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil {
|
|
return fmt.Errorf("Unable to allow intercontainer communication: %s", err)
|
|
} else if len(output) != 0 {
|
|
return fmt.Errorf("Error enabling intercontainer communication: %s", output)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Accept all non-intercontainer outgoing packets
|
|
outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
|
|
if !iptables.Exists(outgoingArgs...) {
|
|
if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil {
|
|
return fmt.Errorf("Unable to allow outgoing packets: %s", err)
|
|
} else if len(output) != 0 {
|
|
return fmt.Errorf("Error iptables allow outgoing: %s", output)
|
|
}
|
|
}
|
|
|
|
// Accept incoming packets for existing connections
|
|
existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}
|
|
|
|
if !iptables.Exists(existingArgs...) {
|
|
if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil {
|
|
return fmt.Errorf("Unable to allow incoming packets: %s", err)
|
|
} else if len(output) != 0 {
|
|
return fmt.Errorf("Error iptables allow incoming: %s", output)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`,
|
|
// and attempts to configure it with an address which doesn't conflict with any other interface on the host.
|
|
// If it can't find an address which doesn't conflict, it will return an error.
|
|
func createBridge(bridgeIP string) error {
|
|
nameservers := []string{}
|
|
resolvConf, _ := resolvconf.Get()
|
|
// we don't check for an error here, because we don't really care
|
|
// if we can't read /etc/resolv.conf. So instead we skip the append
|
|
// if resolvConf is nil. It either doesn't exist, or we can't read it
|
|
// for some reason.
|
|
if resolvConf != nil {
|
|
nameservers = append(nameservers, resolvconf.GetNameserversAsCIDR(resolvConf)...)
|
|
}
|
|
|
|
var ifaceAddr string
|
|
if len(bridgeIP) != 0 {
|
|
_, _, err := net.ParseCIDR(bridgeIP)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ifaceAddr = bridgeIP
|
|
} else {
|
|
for _, addr := range addrs {
|
|
_, dockerNetwork, err := net.ParseCIDR(addr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil {
|
|
if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil {
|
|
ifaceAddr = addr
|
|
break
|
|
} else {
|
|
log.Debugf("%s %s", addr, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ifaceAddr == "" {
|
|
return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface)
|
|
}
|
|
log.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr)
|
|
|
|
if err := createBridgeIface(bridgeIface); err != nil {
|
|
return err
|
|
}
|
|
|
|
iface, err := net.InterfaceByName(bridgeIface)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil {
|
|
return fmt.Errorf("Unable to add private network: %s", err)
|
|
}
|
|
if err := netlink.NetworkLinkUp(iface); err != nil {
|
|
return fmt.Errorf("Unable to start network bridge: %s", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func createBridgeIface(name string) error {
|
|
kv, err := kernel.GetKernelVersion()
|
|
// only set the bridge's mac address if the kernel version is > 3.3
|
|
// before that it was not supported
|
|
setBridgeMacAddr := err == nil && (kv.Kernel >= 3 && kv.Major >= 3)
|
|
log.Debugf("setting bridge mac address = %v", setBridgeMacAddr)
|
|
return netlink.CreateBridge(name, setBridgeMacAddr)
|
|
}
|
|
|
|
// Generate a IEEE802 compliant MAC address from the given IP address.
|
|
//
|
|
// The generator is guaranteed to be consistent: the same IP will always yield the same
|
|
// MAC address. This is to avoid ARP cache issues.
|
|
func generateMacAddr(ip net.IP) net.HardwareAddr {
|
|
hw := make(net.HardwareAddr, 6)
|
|
|
|
// The first byte of the MAC address has to comply with these rules:
|
|
// 1. Unicast: Set the least-significant bit to 0.
|
|
// 2. Address is locally administered: Set the second-least-significant bit (U/L) to 1.
|
|
// 3. As "small" as possible: The veth address has to be "smaller" than the bridge address.
|
|
hw[0] = 0x02
|
|
|
|
// The first 24 bits of the MAC represent the Organizationally Unique Identifier (OUI).
|
|
// Since this address is locally administered, we can do whatever we want as long as
|
|
// it doesn't conflict with other addresses.
|
|
hw[1] = 0x42
|
|
|
|
// Insert the IP address into the last 32 bits of the MAC address.
|
|
// This is a simple way to guarantee the address will be consistent and unique.
|
|
copy(hw[2:], ip.To4())
|
|
|
|
return hw
|
|
}
|
|
|
|
// Allocate a network interface
|
|
func Allocate(job *engine.Job) engine.Status {
|
|
var (
|
|
ip net.IP
|
|
mac net.HardwareAddr
|
|
err error
|
|
id = job.Args[0]
|
|
requestedIP = net.ParseIP(job.Getenv("RequestedIP"))
|
|
)
|
|
|
|
if requestedIP != nil {
|
|
ip, err = ipallocator.RequestIP(bridgeNetwork, requestedIP)
|
|
} else {
|
|
ip, err = ipallocator.RequestIP(bridgeNetwork, nil)
|
|
}
|
|
if err != nil {
|
|
return job.Error(err)
|
|
}
|
|
|
|
// If no explicit mac address was given, generate a random one.
|
|
if mac, err = net.ParseMAC(job.Getenv("RequestedMac")); err != nil {
|
|
mac = generateMacAddr(ip)
|
|
}
|
|
|
|
out := engine.Env{}
|
|
out.Set("IP", ip.String())
|
|
out.Set("Mask", bridgeNetwork.Mask.String())
|
|
out.Set("Gateway", bridgeNetwork.IP.String())
|
|
out.Set("MacAddress", mac.String())
|
|
out.Set("Bridge", bridgeIface)
|
|
|
|
size, _ := bridgeNetwork.Mask.Size()
|
|
out.SetInt("IPPrefixLen", size)
|
|
|
|
currentInterfaces.Set(id, &networkInterface{
|
|
IP: ip,
|
|
})
|
|
|
|
out.WriteTo(job.Stdout)
|
|
|
|
return engine.StatusOK
|
|
}
|
|
|
|
// release an interface for a select ip
|
|
func Release(job *engine.Job) engine.Status {
|
|
var (
|
|
id = job.Args[0]
|
|
containerInterface = currentInterfaces.Get(id)
|
|
)
|
|
|
|
if containerInterface == nil {
|
|
return job.Errorf("No network information to release for %s", id)
|
|
}
|
|
|
|
for _, nat := range containerInterface.PortMappings {
|
|
if err := portmapper.Unmap(nat); err != nil {
|
|
log.Infof("Unable to unmap port %s: %s", nat, err)
|
|
}
|
|
}
|
|
|
|
if err := ipallocator.ReleaseIP(bridgeNetwork, containerInterface.IP); err != nil {
|
|
log.Infof("Unable to release ip %s", err)
|
|
}
|
|
return engine.StatusOK
|
|
}
|
|
|
|
// Allocate an external port and map it to the interface
|
|
func AllocatePort(job *engine.Job) engine.Status {
|
|
var (
|
|
err error
|
|
|
|
ip = defaultBindingIP
|
|
id = job.Args[0]
|
|
hostIP = job.Getenv("HostIP")
|
|
hostPort = job.GetenvInt("HostPort")
|
|
containerPort = job.GetenvInt("ContainerPort")
|
|
proto = job.Getenv("Proto")
|
|
network = currentInterfaces.Get(id)
|
|
)
|
|
|
|
if hostIP != "" {
|
|
ip = net.ParseIP(hostIP)
|
|
if ip == nil {
|
|
return job.Errorf("Bad parameter: invalid host ip %s", hostIP)
|
|
}
|
|
}
|
|
|
|
// host ip, proto, and host port
|
|
var container net.Addr
|
|
switch proto {
|
|
case "tcp":
|
|
container = &net.TCPAddr{IP: network.IP, Port: containerPort}
|
|
case "udp":
|
|
container = &net.UDPAddr{IP: network.IP, Port: containerPort}
|
|
default:
|
|
return job.Errorf("unsupported address type %s", proto)
|
|
}
|
|
|
|
//
|
|
// Try up to 10 times to get a port that's not already allocated.
|
|
//
|
|
// In the event of failure to bind, return the error that portmapper.Map
|
|
// yields.
|
|
//
|
|
|
|
var host net.Addr
|
|
for i := 0; i < MaxAllocatedPortAttempts; i++ {
|
|
if host, err = portmapper.Map(container, ip, hostPort); err == nil {
|
|
break
|
|
}
|
|
|
|
if allocerr, ok := err.(portallocator.ErrPortAlreadyAllocated); ok {
|
|
// There is no point in immediately retrying to map an explicitly
|
|
// chosen port.
|
|
if hostPort != 0 {
|
|
job.Logf("Failed to bind %s for container address %s: %s", allocerr.IPPort(), container.String(), allocerr.Error())
|
|
break
|
|
}
|
|
|
|
// Automatically chosen 'free' port failed to bind: move on the next.
|
|
job.Logf("Failed to bind %s for container address %s. Trying another port.", allocerr.IPPort(), container.String())
|
|
} else {
|
|
// some other error during mapping
|
|
job.Logf("Received an unexpected error during port allocation: %s", err.Error())
|
|
break
|
|
}
|
|
}
|
|
|
|
if err != nil {
|
|
return job.Error(err)
|
|
}
|
|
|
|
network.PortMappings = append(network.PortMappings, host)
|
|
|
|
out := engine.Env{}
|
|
switch netAddr := host.(type) {
|
|
case *net.TCPAddr:
|
|
out.Set("HostIP", netAddr.IP.String())
|
|
out.SetInt("HostPort", netAddr.Port)
|
|
case *net.UDPAddr:
|
|
out.Set("HostIP", netAddr.IP.String())
|
|
out.SetInt("HostPort", netAddr.Port)
|
|
}
|
|
if _, err := out.WriteTo(job.Stdout); err != nil {
|
|
return job.Error(err)
|
|
}
|
|
|
|
return engine.StatusOK
|
|
}
|
|
|
|
func LinkContainers(job *engine.Job) engine.Status {
|
|
var (
|
|
action = job.Args[0]
|
|
childIP = job.Getenv("ChildIP")
|
|
parentIP = job.Getenv("ParentIP")
|
|
ignoreErrors = job.GetenvBool("IgnoreErrors")
|
|
ports = job.GetenvList("Ports")
|
|
)
|
|
split := func(p string) (string, string) {
|
|
parts := strings.Split(p, "/")
|
|
return parts[0], parts[1]
|
|
}
|
|
|
|
for _, p := range ports {
|
|
port, proto := split(p)
|
|
if output, err := iptables.Raw(action, "FORWARD",
|
|
"-i", bridgeIface, "-o", bridgeIface,
|
|
"-p", proto,
|
|
"-s", parentIP,
|
|
"--dport", port,
|
|
"-d", childIP,
|
|
"-j", "ACCEPT"); !ignoreErrors && err != nil {
|
|
return job.Error(err)
|
|
} else if len(output) != 0 {
|
|
return job.Errorf("Error toggle iptables forward: %s", output)
|
|
}
|
|
|
|
if output, err := iptables.Raw(action, "FORWARD",
|
|
"-i", bridgeIface, "-o", bridgeIface,
|
|
"-p", proto,
|
|
"-s", childIP,
|
|
"--sport", port,
|
|
"-d", parentIP,
|
|
"-j", "ACCEPT"); !ignoreErrors && err != nil {
|
|
return job.Error(err)
|
|
} else if len(output) != 0 {
|
|
return job.Errorf("Error toggle iptables forward: %s", output)
|
|
}
|
|
}
|
|
return engine.StatusOK
|
|
}
|