2014-03-20 21:51:28 +00:00
package bridge
2014-01-29 16:59:21 -08:00
import (
"fmt"
2014-05-05 22:51:32 +00:00
"io/ioutil"
"log"
"net"
"strings"
2014-05-29 16:28:06 +04:00
"sync"
2014-05-05 22:51:32 +00:00
2014-06-10 19:58:15 -07:00
"github.com/docker/libcontainer/netlink"
2014-04-17 14:43:01 -07:00
"github.com/dotcloud/docker/daemon/networkdriver"
"github.com/dotcloud/docker/daemon/networkdriver/ipallocator"
"github.com/dotcloud/docker/daemon/networkdriver/portallocator"
"github.com/dotcloud/docker/daemon/networkdriver/portmapper"
2014-01-29 16:59:21 -08:00
"github.com/dotcloud/docker/engine"
2014-03-14 15:07:52 -07:00
"github.com/dotcloud/docker/pkg/iptables"
2014-05-05 22:51:32 +00:00
"github.com/dotcloud/docker/pkg/networkfs/resolvconf"
2014-01-29 16:59:21 -08:00
"github.com/dotcloud/docker/utils"
)
const (
DefaultNetworkBridge = "docker0"
)
// Network interface represents the networking stack of a container
type networkInterface struct {
IP net . IP
PortMappings [ ] net . Addr // there are mappings to the host interfaces
}
2014-05-29 16:28:06 +04:00
type ifaces struct {
c map [ string ] * networkInterface
sync . Mutex
}
func ( i * ifaces ) Set ( key string , n * networkInterface ) {
i . Lock ( )
i . c [ key ] = n
i . Unlock ( )
}
func ( i * ifaces ) Get ( key string ) * networkInterface {
i . Lock ( )
res := i . c [ key ]
i . Unlock ( )
return res
}
2014-01-29 16:59:21 -08:00
var (
addrs = [ ] string {
// Here we don't follow the convention of using the 1st IP of the range for the gateway.
// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
// on the internal addressing or other stupid things like that.
2014-04-15 17:35:36 -04:00
// They shouldn't, but hey, let's not break them unless we really have to.
2014-01-29 16:59:21 -08:00
"172.17.42.1/16" , // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
"10.0.42.1/16" , // Don't even try using the entire /8, that's too intrusive
"10.1.42.1/16" ,
"10.42.42.1/16" ,
"172.16.42.1/24" ,
"172.16.43.1/24" ,
"172.16.44.1/24" ,
"10.0.42.1/24" ,
"10.0.43.1/24" ,
"192.168.42.1/24" ,
"192.168.43.1/24" ,
"192.168.44.1/24" ,
}
2014-01-30 14:52:59 -08:00
bridgeIface string
bridgeNetwork * net . IPNet
2014-01-29 16:59:21 -08:00
2014-01-30 14:52:59 -08:00
defaultBindingIP = net . ParseIP ( "0.0.0.0" )
2014-05-29 16:28:06 +04:00
currentInterfaces = ifaces { c : make ( map [ string ] * networkInterface ) }
2014-01-29 16:59:21 -08:00
)
func InitDriver ( job * engine . Job ) engine . Status {
var (
network * net . IPNet
enableIPTables = job . GetenvBool ( "EnableIptables" )
icc = job . GetenvBool ( "InterContainerCommunication" )
ipForward = job . GetenvBool ( "EnableIpForward" )
2014-01-29 18:34:43 -08:00
bridgeIP = job . Getenv ( "BridgeIP" )
2014-01-29 16:59:21 -08:00
)
2014-01-29 18:34:43 -08:00
2014-01-30 14:52:59 -08:00
if defaultIP := job . Getenv ( "DefaultBindingIP" ) ; defaultIP != "" {
defaultBindingIP = net . ParseIP ( defaultIP )
}
2014-01-30 11:25:06 -08:00
2014-01-29 16:59:21 -08:00
bridgeIface = job . Getenv ( "BridgeIface" )
2014-04-08 14:07:02 -04:00
usingDefaultBridge := false
2014-01-29 18:34:43 -08:00
if bridgeIface == "" {
2014-04-08 14:07:02 -04:00
usingDefaultBridge = true
2014-01-29 18:34:43 -08:00
bridgeIface = DefaultNetworkBridge
}
2014-01-29 16:59:21 -08:00
addr , err := networkdriver . GetIfaceAddr ( bridgeIface )
if err != nil {
2014-04-08 14:07:02 -04:00
// If we're not using the default bridge, fail without trying to create it
if ! usingDefaultBridge {
job . Logf ( "bridge not found: %s" , bridgeIface )
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-04-08 14:07:02 -04:00
}
2014-01-29 16:59:21 -08:00
// If the iface is not found, try to create it
2014-01-29 18:34:43 -08:00
job . Logf ( "creating new bridge for %s" , bridgeIface )
if err := createBridge ( bridgeIP ) ; err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
2014-01-29 18:34:43 -08:00
job . Logf ( "getting iface addr" )
2014-01-29 16:59:21 -08:00
addr , err = networkdriver . GetIfaceAddr ( bridgeIface )
if err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
network = addr . ( * net . IPNet )
} else {
network = addr . ( * net . IPNet )
2014-03-26 11:51:27 +00:00
// validate that the bridge ip matches the ip specified by BridgeIP
if bridgeIP != "" {
2014-04-28 17:04:56 -07:00
bip , _ , err := net . ParseCIDR ( bridgeIP )
if err != nil {
return job . Error ( err )
}
if ! network . IP . Equal ( bip ) {
return job . Errorf ( "bridge ip (%s) does not match existing bridge configuration %s" , network . IP , bip )
2014-03-26 11:51:27 +00:00
}
}
2014-01-29 16:59:21 -08:00
}
// Configure iptables for link support
if enableIPTables {
if err := setupIPTables ( addr , icc ) ; err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
}
if ipForward {
// Enable IPv4 forwarding
if err := ioutil . WriteFile ( "/proc/sys/net/ipv4/ip_forward" , [ ] byte { '1' , '\n' } , 0644 ) ; err != nil {
job . Logf ( "WARNING: unable to enable IPv4 forwarding: %s\n" , err )
}
}
// We can always try removing the iptables
if err := iptables . RemoveExistingChain ( "DOCKER" ) ; err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
if enableIPTables {
chain , err := iptables . NewChain ( "DOCKER" , bridgeIface )
if err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
portmapper . SetIptablesChain ( chain )
}
bridgeNetwork = network
2014-01-29 18:34:43 -08:00
// https://github.com/dotcloud/docker/issues/2768
job . Eng . Hack_SetGlobalVar ( "httpapi.bridgeIP" , bridgeNetwork . IP )
2014-01-29 16:59:21 -08:00
for name , f := range map [ string ] engine . Handler {
"allocate_interface" : Allocate ,
"release_interface" : Release ,
2014-01-29 18:34:43 -08:00
"allocate_port" : AllocatePort ,
2014-01-30 12:43:49 -08:00
"link" : LinkContainers ,
2014-01-29 16:59:21 -08:00
} {
if err := job . Eng . Register ( name , f ) ; err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
}
return engine . StatusOK
}
func setupIPTables ( addr net . Addr , icc bool ) error {
// Enable NAT
natArgs := [ ] string { "POSTROUTING" , "-t" , "nat" , "-s" , addr . String ( ) , "!" , "-d" , addr . String ( ) , "-j" , "MASQUERADE" }
if ! iptables . Exists ( natArgs ... ) {
2014-01-31 10:20:03 -08:00
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , natArgs ... ) ... ) ; err != nil {
2014-01-29 16:59:21 -08:00
return fmt . Errorf ( "Unable to enable network bridge NAT: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error iptables postrouting: %s" , output )
}
}
var (
args = [ ] string { "FORWARD" , "-i" , bridgeIface , "-o" , bridgeIface , "-j" }
acceptArgs = append ( args , "ACCEPT" )
dropArgs = append ( args , "DROP" )
)
if ! icc {
iptables . Raw ( append ( [ ] string { "-D" } , acceptArgs ... ) ... )
if ! iptables . Exists ( dropArgs ... ) {
utils . Debugf ( "Disable inter-container communication" )
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , dropArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to prevent intercontainer communication: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error disabling intercontainer communication: %s" , output )
}
}
} else {
iptables . Raw ( append ( [ ] string { "-D" } , dropArgs ... ) ... )
if ! iptables . Exists ( acceptArgs ... ) {
utils . Debugf ( "Enable inter-container communication" )
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , acceptArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to allow intercontainer communication: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error enabling intercontainer communication: %s" , output )
}
}
}
// Accept all non-intercontainer outgoing packets
outgoingArgs := [ ] string { "FORWARD" , "-i" , bridgeIface , "!" , "-o" , bridgeIface , "-j" , "ACCEPT" }
if ! iptables . Exists ( outgoingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , outgoingArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to allow outgoing packets: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error iptables allow outgoing: %s" , output )
}
}
// Accept incoming packets for existing connections
existingArgs := [ ] string { "FORWARD" , "-o" , bridgeIface , "-m" , "conntrack" , "--ctstate" , "RELATED,ESTABLISHED" , "-j" , "ACCEPT" }
if ! iptables . Exists ( existingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , existingArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to allow incoming packets: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error iptables allow incoming: %s" , output )
}
}
return nil
}
// CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`,
// and attempts to configure it with an address which doesn't conflict with any other interface on the host.
// If it can't find an address which doesn't conflict, it will return an error.
func createBridge ( bridgeIP string ) error {
nameservers := [ ] string { }
2014-05-05 22:51:32 +00:00
resolvConf , _ := resolvconf . Get ( )
2014-01-29 16:59:21 -08:00
// we don't check for an error here, because we don't really care
// if we can't read /etc/resolv.conf. So instead we skip the append
// if resolvConf is nil. It either doesn't exist, or we can't read it
// for some reason.
if resolvConf != nil {
2014-05-05 22:51:32 +00:00
nameservers = append ( nameservers , resolvconf . GetNameserversAsCIDR ( resolvConf ) ... )
2014-01-29 16:59:21 -08:00
}
var ifaceAddr string
if len ( bridgeIP ) != 0 {
_ , _ , err := net . ParseCIDR ( bridgeIP )
if err != nil {
return err
}
ifaceAddr = bridgeIP
} else {
for _ , addr := range addrs {
_ , dockerNetwork , err := net . ParseCIDR ( addr )
if err != nil {
return err
}
if err := networkdriver . CheckNameserverOverlaps ( nameservers , dockerNetwork ) ; err == nil {
if err := networkdriver . CheckRouteOverlaps ( dockerNetwork ) ; err == nil {
ifaceAddr = addr
break
} else {
utils . Debugf ( "%s %s" , addr , err )
}
}
}
}
if ifaceAddr == "" {
return fmt . Errorf ( "Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'" , bridgeIface , bridgeIface )
}
utils . Debugf ( "Creating bridge %s with network %s" , bridgeIface , ifaceAddr )
if err := createBridgeIface ( bridgeIface ) ; err != nil {
return err
}
iface , err := net . InterfaceByName ( bridgeIface )
if err != nil {
return err
}
ipAddr , ipNet , err := net . ParseCIDR ( ifaceAddr )
if err != nil {
return err
}
if netlink . NetworkLinkAddIp ( iface , ipAddr , ipNet ) ; err != nil {
return fmt . Errorf ( "Unable to add private network: %s" , err )
}
if err := netlink . NetworkLinkUp ( iface ) ; err != nil {
return fmt . Errorf ( "Unable to start network bridge: %s" , err )
}
return nil
}
func createBridgeIface ( name string ) error {
2014-03-31 21:02:42 +00:00
kv , err := utils . GetKernelVersion ( )
// only set the bridge's mac address if the kernel version is > 3.3
// before that it was not supported
setBridgeMacAddr := err == nil && ( kv . Kernel >= 3 && kv . Major >= 3 )
utils . Debugf ( "setting bridge mac address = %v" , setBridgeMacAddr )
return netlink . CreateBridge ( name , setBridgeMacAddr )
2014-01-29 16:59:21 -08:00
}
// Allocate a network interface
func Allocate ( job * engine . Job ) engine . Status {
2014-01-30 12:02:56 -08:00
var (
ip * net . IP
err error
id = job . Args [ 0 ]
requestedIP = net . ParseIP ( job . Getenv ( "RequestedIP" ) )
)
2014-01-29 16:59:21 -08:00
2014-01-30 12:02:56 -08:00
if requestedIP != nil {
ip , err = ipallocator . RequestIP ( bridgeNetwork , & requestedIP )
} else {
ip , err = ipallocator . RequestIP ( bridgeNetwork , nil )
}
2014-01-29 16:59:21 -08:00
if err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
out := engine . Env { }
2014-01-30 11:25:06 -08:00
out . Set ( "IP" , ip . String ( ) )
out . Set ( "Mask" , bridgeNetwork . Mask . String ( ) )
out . Set ( "Gateway" , bridgeNetwork . IP . String ( ) )
2014-01-29 18:34:43 -08:00
out . Set ( "Bridge" , bridgeIface )
size , _ := bridgeNetwork . Mask . Size ( )
out . SetInt ( "IPPrefixLen" , size )
2014-01-29 16:59:21 -08:00
2014-05-29 16:28:06 +04:00
currentInterfaces . Set ( id , & networkInterface {
2014-01-29 16:59:21 -08:00
IP : * ip ,
2014-05-29 16:28:06 +04:00
} )
2014-01-29 16:59:21 -08:00
out . WriteTo ( job . Stdout )
return engine . StatusOK
}
// release an interface for a select ip
func Release ( job * engine . Job ) engine . Status {
var (
id = job . Args [ 0 ]
2014-05-29 16:28:06 +04:00
containerInterface = currentInterfaces . Get ( id )
2014-01-29 16:59:21 -08:00
ip net . IP
port int
proto string
)
2014-02-06 03:18:12 -08:00
if containerInterface == nil {
return job . Errorf ( "No network information to release for %s" , id )
}
2014-01-29 16:59:21 -08:00
for _ , nat := range containerInterface . PortMappings {
if err := portmapper . Unmap ( nat ) ; err != nil {
log . Printf ( "Unable to unmap port %s: %s" , nat , err )
}
// this is host mappings
switch a := nat . ( type ) {
case * net . TCPAddr :
proto = "tcp"
ip = a . IP
port = a . Port
case * net . UDPAddr :
proto = "udp"
ip = a . IP
port = a . Port
}
if err := portallocator . ReleasePort ( ip , proto , port ) ; err != nil {
log . Printf ( "Unable to release port %s" , nat )
}
}
if err := ipallocator . ReleaseIP ( bridgeNetwork , & containerInterface . IP ) ; err != nil {
log . Printf ( "Unable to release ip %s\n" , err )
}
return engine . StatusOK
}
// Allocate an external port and map it to the interface
func AllocatePort ( job * engine . Job ) engine . Status {
var (
2014-01-30 14:52:59 -08:00
err error
2014-01-29 16:59:21 -08:00
ip = defaultBindingIP
id = job . Args [ 0 ]
hostIP = job . Getenv ( "HostIP" )
2014-05-20 21:19:55 -07:00
origHostPort = job . GetenvInt ( "HostPort" )
2014-01-29 16:59:21 -08:00
containerPort = job . GetenvInt ( "ContainerPort" )
proto = job . Getenv ( "Proto" )
2014-05-29 16:28:06 +04:00
network = currentInterfaces . Get ( id )
2014-01-29 16:59:21 -08:00
)
if hostIP != "" {
ip = net . ParseIP ( hostIP )
}
var (
2014-05-20 21:19:55 -07:00
hostPort int
2014-01-29 16:59:21 -08:00
container net . Addr
host net . Addr
)
2014-05-20 21:19:55 -07:00
/ *
Try up to 10 times to get a port that ' s not already allocated .
In the event of failure to bind , return the error that portmapper . Map
yields .
* /
for i := 0 ; i < 10 ; i ++ {
// host ip, proto, and host port
hostPort , err = portallocator . RequestPort ( ip , proto , origHostPort )
if err != nil {
return job . Error ( err )
}
if proto == "tcp" {
host = & net . TCPAddr { IP : ip , Port : hostPort }
container = & net . TCPAddr { IP : network . IP , Port : containerPort }
} else {
host = & net . UDPAddr { IP : ip , Port : hostPort }
container = & net . UDPAddr { IP : network . IP , Port : containerPort }
}
if err = portmapper . Map ( container , ip , hostPort ) ; err == nil {
break
}
job . Logf ( "Failed to bind %s:%d for container address %s:%d. Trying another port." , ip . String ( ) , hostPort , network . IP . String ( ) , containerPort )
2014-01-29 16:59:21 -08:00
}
2014-05-20 21:19:55 -07:00
if err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-29 16:59:21 -08:00
}
2014-05-20 21:19:55 -07:00
2014-01-29 16:59:21 -08:00
network . PortMappings = append ( network . PortMappings , host )
2014-01-30 14:52:59 -08:00
out := engine . Env { }
out . Set ( "HostIP" , ip . String ( ) )
out . SetInt ( "HostPort" , hostPort )
if _ , err := out . WriteTo ( job . Stdout ) ; err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-30 14:52:59 -08:00
}
2014-01-29 16:59:21 -08:00
return engine . StatusOK
}
2014-01-30 12:43:49 -08:00
func LinkContainers ( job * engine . Job ) engine . Status {
var (
action = job . Args [ 0 ]
childIP = job . Getenv ( "ChildIP" )
parentIP = job . Getenv ( "ParentIP" )
ignoreErrors = job . GetenvBool ( "IgnoreErrors" )
ports = job . GetenvList ( "Ports" )
)
split := func ( p string ) ( string , string ) {
parts := strings . Split ( p , "/" )
return parts [ 0 ] , parts [ 1 ]
}
for _ , p := range ports {
port , proto := split ( p )
if output , err := iptables . Raw ( action , "FORWARD" ,
"-i" , bridgeIface , "-o" , bridgeIface ,
"-p" , proto ,
"-s" , parentIP ,
"--dport" , port ,
"-d" , childIP ,
"-j" , "ACCEPT" ) ; ! ignoreErrors && err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-01-30 12:43:49 -08:00
} else if len ( output ) != 0 {
2014-04-10 11:15:56 -04:00
return job . Errorf ( "Error toggle iptables forward: %s" , output )
2014-01-30 12:43:49 -08:00
}
2014-02-09 01:43:46 -08:00
if output , err := iptables . Raw ( action , "FORWARD" ,
"-i" , bridgeIface , "-o" , bridgeIface ,
"-p" , proto ,
"-s" , childIP ,
"--sport" , port ,
"-d" , parentIP ,
"-j" , "ACCEPT" ) ; ! ignoreErrors && err != nil {
2014-04-10 11:15:56 -04:00
return job . Error ( err )
2014-02-09 01:43:46 -08:00
} else if len ( output ) != 0 {
2014-04-10 11:15:56 -04:00
return job . Errorf ( "Error toggle iptables forward: %s" , output )
2014-02-09 01:43:46 -08:00
}
2014-01-30 12:43:49 -08:00
}
return engine . StatusOK
}