2014-01-29 16:59:21 -08:00
package lxc
import (
"fmt"
"github.com/dotcloud/docker/engine"
"github.com/dotcloud/docker/networkdriver"
"github.com/dotcloud/docker/networkdriver/ipallocator"
"github.com/dotcloud/docker/networkdriver/portallocator"
"github.com/dotcloud/docker/networkdriver/portmapper"
"github.com/dotcloud/docker/pkg/iptables"
"github.com/dotcloud/docker/pkg/netlink"
"github.com/dotcloud/docker/utils"
"io/ioutil"
"log"
"net"
2014-01-30 12:43:49 -08:00
"strings"
2014-01-29 16:59:21 -08:00
"syscall"
"unsafe"
)
const (
DefaultNetworkBridge = "docker0"
siocBRADDBR = 0x89a0
)
// Network interface represents the networking stack of a container
type networkInterface struct {
IP net . IP
PortMappings [ ] net . Addr // there are mappings to the host interfaces
}
var (
addrs = [ ] string {
// Here we don't follow the convention of using the 1st IP of the range for the gateway.
// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
// on the internal addressing or other stupid things like that.
// The shouldn't, but hey, let's not break them unless we really have to.
"172.17.42.1/16" , // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
"10.0.42.1/16" , // Don't even try using the entire /8, that's too intrusive
"10.1.42.1/16" ,
"10.42.42.1/16" ,
"172.16.42.1/24" ,
"172.16.43.1/24" ,
"172.16.44.1/24" ,
"10.0.42.1/24" ,
"10.0.43.1/24" ,
"192.168.42.1/24" ,
"192.168.43.1/24" ,
"192.168.44.1/24" ,
}
2014-01-30 14:52:59 -08:00
bridgeIface string
bridgeNetwork * net . IPNet
2014-01-29 16:59:21 -08:00
2014-01-30 14:52:59 -08:00
defaultBindingIP = net . ParseIP ( "0.0.0.0" )
2014-01-29 16:59:21 -08:00
currentInterfaces = make ( map [ string ] * networkInterface )
)
func init ( ) {
if err := engine . Register ( "init_networkdriver" , InitDriver ) ; err != nil {
panic ( err )
}
}
func InitDriver ( job * engine . Job ) engine . Status {
var (
network * net . IPNet
enableIPTables = job . GetenvBool ( "EnableIptables" )
icc = job . GetenvBool ( "InterContainerCommunication" )
ipForward = job . GetenvBool ( "EnableIpForward" )
2014-01-29 18:34:43 -08:00
bridgeIP = job . Getenv ( "BridgeIP" )
2014-01-29 16:59:21 -08:00
)
2014-01-29 18:34:43 -08:00
2014-01-30 14:52:59 -08:00
if defaultIP := job . Getenv ( "DefaultBindingIP" ) ; defaultIP != "" {
defaultBindingIP = net . ParseIP ( defaultIP )
}
2014-01-30 11:25:06 -08:00
2014-01-29 16:59:21 -08:00
bridgeIface = job . Getenv ( "BridgeIface" )
2014-01-29 18:34:43 -08:00
if bridgeIface == "" {
bridgeIface = DefaultNetworkBridge
}
2014-01-29 16:59:21 -08:00
addr , err := networkdriver . GetIfaceAddr ( bridgeIface )
if err != nil {
// If the iface is not found, try to create it
2014-01-29 18:34:43 -08:00
job . Logf ( "creating new bridge for %s" , bridgeIface )
if err := createBridge ( bridgeIP ) ; err != nil {
2014-01-29 16:59:21 -08:00
job . Error ( err )
return engine . StatusErr
}
2014-01-29 18:34:43 -08:00
job . Logf ( "getting iface addr" )
2014-01-29 16:59:21 -08:00
addr , err = networkdriver . GetIfaceAddr ( bridgeIface )
if err != nil {
job . Error ( err )
return engine . StatusErr
}
network = addr . ( * net . IPNet )
} else {
network = addr . ( * net . IPNet )
}
// Configure iptables for link support
if enableIPTables {
if err := setupIPTables ( addr , icc ) ; err != nil {
job . Error ( err )
return engine . StatusErr
}
}
if ipForward {
// Enable IPv4 forwarding
if err := ioutil . WriteFile ( "/proc/sys/net/ipv4/ip_forward" , [ ] byte { '1' , '\n' } , 0644 ) ; err != nil {
job . Logf ( "WARNING: unable to enable IPv4 forwarding: %s\n" , err )
}
}
// We can always try removing the iptables
if err := iptables . RemoveExistingChain ( "DOCKER" ) ; err != nil {
job . Error ( err )
return engine . StatusErr
}
if enableIPTables {
chain , err := iptables . NewChain ( "DOCKER" , bridgeIface )
if err != nil {
job . Error ( err )
return engine . StatusErr
}
portmapper . SetIptablesChain ( chain )
}
bridgeNetwork = network
2014-01-29 18:34:43 -08:00
// https://github.com/dotcloud/docker/issues/2768
job . Eng . Hack_SetGlobalVar ( "httpapi.bridgeIP" , bridgeNetwork . IP )
2014-01-29 16:59:21 -08:00
for name , f := range map [ string ] engine . Handler {
"allocate_interface" : Allocate ,
"release_interface" : Release ,
2014-01-29 18:34:43 -08:00
"allocate_port" : AllocatePort ,
2014-01-30 12:43:49 -08:00
"link" : LinkContainers ,
2014-01-29 16:59:21 -08:00
} {
if err := job . Eng . Register ( name , f ) ; err != nil {
job . Error ( err )
return engine . StatusErr
}
}
return engine . StatusOK
}
func setupIPTables ( addr net . Addr , icc bool ) error {
// Enable NAT
natArgs := [ ] string { "POSTROUTING" , "-t" , "nat" , "-s" , addr . String ( ) , "!" , "-d" , addr . String ( ) , "-j" , "MASQUERADE" }
if ! iptables . Exists ( natArgs ... ) {
2014-01-31 10:20:03 -08:00
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , natArgs ... ) ... ) ; err != nil {
2014-01-29 16:59:21 -08:00
return fmt . Errorf ( "Unable to enable network bridge NAT: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error iptables postrouting: %s" , output )
}
}
var (
args = [ ] string { "FORWARD" , "-i" , bridgeIface , "-o" , bridgeIface , "-j" }
acceptArgs = append ( args , "ACCEPT" )
dropArgs = append ( args , "DROP" )
)
if ! icc {
iptables . Raw ( append ( [ ] string { "-D" } , acceptArgs ... ) ... )
if ! iptables . Exists ( dropArgs ... ) {
utils . Debugf ( "Disable inter-container communication" )
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , dropArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to prevent intercontainer communication: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error disabling intercontainer communication: %s" , output )
}
}
} else {
iptables . Raw ( append ( [ ] string { "-D" } , dropArgs ... ) ... )
if ! iptables . Exists ( acceptArgs ... ) {
utils . Debugf ( "Enable inter-container communication" )
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , acceptArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to allow intercontainer communication: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error enabling intercontainer communication: %s" , output )
}
}
}
// Accept all non-intercontainer outgoing packets
outgoingArgs := [ ] string { "FORWARD" , "-i" , bridgeIface , "!" , "-o" , bridgeIface , "-j" , "ACCEPT" }
if ! iptables . Exists ( outgoingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , outgoingArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to allow outgoing packets: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error iptables allow outgoing: %s" , output )
}
}
// Accept incoming packets for existing connections
existingArgs := [ ] string { "FORWARD" , "-o" , bridgeIface , "-m" , "conntrack" , "--ctstate" , "RELATED,ESTABLISHED" , "-j" , "ACCEPT" }
if ! iptables . Exists ( existingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , existingArgs ... ) ... ) ; err != nil {
return fmt . Errorf ( "Unable to allow incoming packets: %s" , err )
} else if len ( output ) != 0 {
return fmt . Errorf ( "Error iptables allow incoming: %s" , output )
}
}
return nil
}
// CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`,
// and attempts to configure it with an address which doesn't conflict with any other interface on the host.
// If it can't find an address which doesn't conflict, it will return an error.
func createBridge ( bridgeIP string ) error {
nameservers := [ ] string { }
resolvConf , _ := utils . GetResolvConf ( )
// we don't check for an error here, because we don't really care
// if we can't read /etc/resolv.conf. So instead we skip the append
// if resolvConf is nil. It either doesn't exist, or we can't read it
// for some reason.
if resolvConf != nil {
nameservers = append ( nameservers , utils . GetNameserversAsCIDR ( resolvConf ) ... )
}
var ifaceAddr string
if len ( bridgeIP ) != 0 {
_ , _ , err := net . ParseCIDR ( bridgeIP )
if err != nil {
return err
}
ifaceAddr = bridgeIP
} else {
for _ , addr := range addrs {
_ , dockerNetwork , err := net . ParseCIDR ( addr )
if err != nil {
return err
}
if err := networkdriver . CheckNameserverOverlaps ( nameservers , dockerNetwork ) ; err == nil {
if err := networkdriver . CheckRouteOverlaps ( dockerNetwork ) ; err == nil {
ifaceAddr = addr
break
} else {
utils . Debugf ( "%s %s" , addr , err )
}
}
}
}
if ifaceAddr == "" {
return fmt . Errorf ( "Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'" , bridgeIface , bridgeIface )
}
utils . Debugf ( "Creating bridge %s with network %s" , bridgeIface , ifaceAddr )
if err := createBridgeIface ( bridgeIface ) ; err != nil {
return err
}
iface , err := net . InterfaceByName ( bridgeIface )
if err != nil {
return err
}
ipAddr , ipNet , err := net . ParseCIDR ( ifaceAddr )
if err != nil {
return err
}
if netlink . NetworkLinkAddIp ( iface , ipAddr , ipNet ) ; err != nil {
return fmt . Errorf ( "Unable to add private network: %s" , err )
}
if err := netlink . NetworkLinkUp ( iface ) ; err != nil {
return fmt . Errorf ( "Unable to start network bridge: %s" , err )
}
return nil
}
// Create the actual bridge device. This is more backward-compatible than
// netlink.NetworkLinkAdd and works on RHEL 6.
func createBridgeIface ( name string ) error {
s , err := syscall . Socket ( syscall . AF_INET6 , syscall . SOCK_STREAM , syscall . IPPROTO_IP )
if err != nil {
utils . Debugf ( "Bridge socket creation failed IPv6 probably not enabled: %v" , err )
s , err = syscall . Socket ( syscall . AF_INET , syscall . SOCK_STREAM , syscall . IPPROTO_IP )
if err != nil {
return fmt . Errorf ( "Error creating bridge creation socket: %s" , err )
}
}
defer syscall . Close ( s )
nameBytePtr , err := syscall . BytePtrFromString ( name )
if err != nil {
return fmt . Errorf ( "Error converting bridge name %s to byte array: %s" , name , err )
}
if _ , _ , err := syscall . Syscall ( syscall . SYS_IOCTL , uintptr ( s ) , siocBRADDBR , uintptr ( unsafe . Pointer ( nameBytePtr ) ) ) ; err != 0 {
return fmt . Errorf ( "Error creating bridge: %s" , err )
}
return nil
}
// Allocate a network interface
func Allocate ( job * engine . Job ) engine . Status {
2014-01-30 12:02:56 -08:00
var (
ip * net . IP
err error
id = job . Args [ 0 ]
requestedIP = net . ParseIP ( job . Getenv ( "RequestedIP" ) )
)
2014-01-29 16:59:21 -08:00
2014-01-30 12:02:56 -08:00
if requestedIP != nil {
ip , err = ipallocator . RequestIP ( bridgeNetwork , & requestedIP )
} else {
ip , err = ipallocator . RequestIP ( bridgeNetwork , nil )
}
2014-01-29 16:59:21 -08:00
if err != nil {
job . Error ( err )
return engine . StatusErr
}
out := engine . Env { }
2014-01-30 11:25:06 -08:00
out . Set ( "IP" , ip . String ( ) )
out . Set ( "Mask" , bridgeNetwork . Mask . String ( ) )
out . Set ( "Gateway" , bridgeNetwork . IP . String ( ) )
2014-01-29 18:34:43 -08:00
out . Set ( "Bridge" , bridgeIface )
size , _ := bridgeNetwork . Mask . Size ( )
out . SetInt ( "IPPrefixLen" , size )
2014-01-29 16:59:21 -08:00
currentInterfaces [ id ] = & networkInterface {
IP : * ip ,
}
out . WriteTo ( job . Stdout )
return engine . StatusOK
}
// release an interface for a select ip
func Release ( job * engine . Job ) engine . Status {
var (
id = job . Args [ 0 ]
containerInterface = currentInterfaces [ id ]
ip net . IP
port int
proto string
)
2014-02-06 03:18:12 -08:00
if containerInterface == nil {
return job . Errorf ( "No network information to release for %s" , id )
}
2014-01-29 16:59:21 -08:00
for _ , nat := range containerInterface . PortMappings {
if err := portmapper . Unmap ( nat ) ; err != nil {
log . Printf ( "Unable to unmap port %s: %s" , nat , err )
}
// this is host mappings
switch a := nat . ( type ) {
case * net . TCPAddr :
proto = "tcp"
ip = a . IP
port = a . Port
case * net . UDPAddr :
proto = "udp"
ip = a . IP
port = a . Port
}
if err := portallocator . ReleasePort ( ip , proto , port ) ; err != nil {
log . Printf ( "Unable to release port %s" , nat )
}
}
if err := ipallocator . ReleaseIP ( bridgeNetwork , & containerInterface . IP ) ; err != nil {
log . Printf ( "Unable to release ip %s\n" , err )
}
return engine . StatusOK
}
// Allocate an external port and map it to the interface
func AllocatePort ( job * engine . Job ) engine . Status {
var (
2014-01-30 14:52:59 -08:00
err error
2014-01-29 16:59:21 -08:00
ip = defaultBindingIP
id = job . Args [ 0 ]
hostIP = job . Getenv ( "HostIP" )
hostPort = job . GetenvInt ( "HostPort" )
containerPort = job . GetenvInt ( "ContainerPort" )
proto = job . Getenv ( "Proto" )
network = currentInterfaces [ id ]
)
if hostIP != "" {
ip = net . ParseIP ( hostIP )
}
// host ip, proto, and host port
2014-01-30 14:52:59 -08:00
hostPort , err = portallocator . RequestPort ( ip , proto , hostPort )
2014-01-29 16:59:21 -08:00
if err != nil {
job . Error ( err )
return engine . StatusErr
}
var (
container net . Addr
host net . Addr
)
if proto == "tcp" {
host = & net . TCPAddr { IP : ip , Port : hostPort }
container = & net . TCPAddr { IP : network . IP , Port : containerPort }
} else {
host = & net . UDPAddr { IP : ip , Port : hostPort }
container = & net . UDPAddr { IP : network . IP , Port : containerPort }
}
if err := portmapper . Map ( container , ip , hostPort ) ; err != nil {
portallocator . ReleasePort ( ip , proto , hostPort )
job . Error ( err )
return engine . StatusErr
}
network . PortMappings = append ( network . PortMappings , host )
2014-01-30 14:52:59 -08:00
out := engine . Env { }
out . Set ( "HostIP" , ip . String ( ) )
out . SetInt ( "HostPort" , hostPort )
if _ , err := out . WriteTo ( job . Stdout ) ; err != nil {
job . Error ( err )
return engine . StatusErr
}
2014-01-29 16:59:21 -08:00
return engine . StatusOK
}
2014-01-30 12:43:49 -08:00
func LinkContainers ( job * engine . Job ) engine . Status {
var (
action = job . Args [ 0 ]
childIP = job . Getenv ( "ChildIP" )
parentIP = job . Getenv ( "ParentIP" )
ignoreErrors = job . GetenvBool ( "IgnoreErrors" )
ports = job . GetenvList ( "Ports" )
)
split := func ( p string ) ( string , string ) {
parts := strings . Split ( p , "/" )
return parts [ 0 ] , parts [ 1 ]
}
for _ , p := range ports {
port , proto := split ( p )
if output , err := iptables . Raw ( action , "FORWARD" ,
"-i" , bridgeIface , "-o" , bridgeIface ,
"-p" , proto ,
"-s" , parentIP ,
"--dport" , port ,
"-d" , childIP ,
"-j" , "ACCEPT" ) ; ! ignoreErrors && err != nil {
job . Error ( err )
return engine . StatusErr
} else if len ( output ) != 0 {
job . Errorf ( "Error toggle iptables forward: %s" , output )
return engine . StatusErr
}
2014-02-09 01:43:46 -08:00
if output , err := iptables . Raw ( action , "FORWARD" ,
"-i" , bridgeIface , "-o" , bridgeIface ,
"-p" , proto ,
"-s" , childIP ,
"--sport" , port ,
"-d" , parentIP ,
"-j" , "ACCEPT" ) ; ! ignoreErrors && err != nil {
job . Error ( err )
return engine . StatusErr
} else if len ( output ) != 0 {
job . Errorf ( "Error toggle iptables forward: %s" , output )
return engine . StatusErr
}
2014-01-30 12:43:49 -08:00
}
return engine . StatusOK
}