2013-02-20 20:47:09 -05:00
package docker
import (
2013-02-21 21:33:23 -05:00
"encoding/binary"
2013-02-25 13:45:23 -05:00
"errors"
2013-02-20 21:20:18 -05:00
"fmt"
2013-02-28 14:50:02 -05:00
"log"
2013-02-20 20:47:09 -05:00
"net"
2013-02-28 14:50:02 -05:00
"os/exec"
"strconv"
"strings"
2013-04-05 01:56:12 -04:00
"sync"
2013-02-20 20:47:09 -05:00
)
const (
2013-02-20 21:20:18 -05:00
networkBridgeIface = "lxcbr0"
2013-02-28 14:50:02 -05:00
portRangeStart = 49153
portRangeEnd = 65535
2013-02-20 20:47:09 -05:00
)
2013-02-28 14:50:02 -05:00
// Calculates the first and last IP addresses in an IPNet
2013-02-21 21:33:23 -05:00
func networkRange ( network * net . IPNet ) ( net . IP , net . IP ) {
netIP := network . IP . To4 ( )
firstIP := netIP . Mask ( network . Mask )
lastIP := net . IPv4 ( 0 , 0 , 0 , 0 ) . To4 ( )
for i := 0 ; i < len ( lastIP ) ; i ++ {
lastIP [ i ] = netIP [ i ] | ^ network . Mask [ i ]
}
return firstIP , lastIP
}
2013-02-28 14:50:02 -05:00
// Converts a 4 bytes IP into a 32 bit integer
2013-03-30 18:32:10 -04:00
func ipToInt ( ip net . IP ) int32 {
return int32 ( binary . BigEndian . Uint32 ( ip . To4 ( ) ) )
2013-02-21 21:33:23 -05:00
}
2013-02-28 14:50:02 -05:00
// Converts 32 bit integer into a 4 bytes IP address
2013-03-30 18:32:10 -04:00
func intToIp ( n int32 ) net . IP {
b := make ( [ ] byte , 4 )
binary . BigEndian . PutUint32 ( b , uint32 ( n ) )
return net . IP ( b )
2013-02-21 21:33:23 -05:00
}
2013-02-28 14:50:02 -05:00
// Given a netmask, calculates the number of available hosts
2013-03-30 18:32:10 -04:00
func networkSize ( mask net . IPMask ) int32 {
2013-02-25 17:06:22 -05:00
m := net . IPv4Mask ( 0 , 0 , 0 , 0 )
2013-02-21 21:33:23 -05:00
for i := 0 ; i < net . IPv4len ; i ++ {
2013-02-25 17:06:22 -05:00
m [ i ] = ^ mask [ i ]
2013-02-21 21:33:23 -05:00
}
2013-03-30 18:32:10 -04:00
return int32 ( binary . BigEndian . Uint32 ( m ) ) + 1
2013-02-21 21:33:23 -05:00
}
2013-02-28 14:50:02 -05:00
// Wrapper around the iptables command
func iptables ( args ... string ) error {
2013-03-27 14:29:58 -04:00
path , err := exec . LookPath ( "iptables" )
if err != nil {
2013-03-28 15:30:56 -04:00
return fmt . Errorf ( "command not found: iptables" )
2013-03-27 14:29:58 -04:00
}
if err := exec . Command ( path , args ... ) . Run ( ) ; err != nil {
2013-02-28 14:50:02 -05:00
return fmt . Errorf ( "iptables failed: iptables %v" , strings . Join ( args , " " ) )
}
return nil
}
// Return the IPv4 address of a network interface
2013-02-25 17:06:22 -05:00
func getIfaceAddr ( name string ) ( net . Addr , error ) {
2013-02-20 21:20:18 -05:00
iface , err := net . InterfaceByName ( name )
if err != nil {
return nil , err
}
addrs , err := iface . Addrs ( )
if err != nil {
return nil , err
}
var addrs4 [ ] net . Addr
for _ , addr := range addrs {
ip := ( addr . ( * net . IPNet ) ) . IP
if ip4 := ip . To4 ( ) ; len ( ip4 ) == net . IPv4len {
addrs4 = append ( addrs4 , addr )
}
}
switch {
case len ( addrs4 ) == 0 :
2013-02-28 14:50:02 -05:00
return nil , fmt . Errorf ( "Interface %v has no IP addresses" , name )
2013-02-20 21:20:18 -05:00
case len ( addrs4 ) > 1 :
2013-03-21 12:19:22 -04:00
fmt . Printf ( "Interface %v has more than 1 IPv4 address. Defaulting to using %v\n" ,
name , ( addrs4 [ 0 ] . ( * net . IPNet ) ) . IP )
2013-02-20 21:20:18 -05:00
}
return addrs4 [ 0 ] , nil
}
2013-02-28 14:50:02 -05:00
// Port mapper takes care of mapping external ports to containers by setting
// up iptables rules.
// It keeps track of all mappings and is able to unmap at will
type PortMapper struct {
mapping map [ int ] net . TCPAddr
}
func ( mapper * PortMapper ) cleanup ( ) error {
// Ignore errors - This could mean the chains were never set up
2013-04-03 18:32:46 -04:00
iptables ( "-t" , "nat" , "-D" , "PREROUTING" , "-m" , "addrtype" , "--dst-type" , "LOCAL" , "-j" , "DOCKER" )
2013-04-04 15:56:37 -04:00
iptables ( "-t" , "nat" , "-D" , "OUTPUT" , "-m" , "addrtype" , "--dst-type" , "LOCAL" , "-j" , "DOCKER" )
2013-04-04 18:16:42 -04:00
// Also cleanup rules created by older versions, or -X might fail.
iptables ( "-t" , "nat" , "-D" , "PREROUTING" , "-j" , "DOCKER" )
iptables ( "-t" , "nat" , "-D" , "OUTPUT" , "-j" , "DOCKER" )
2013-02-28 14:50:02 -05:00
iptables ( "-t" , "nat" , "-F" , "DOCKER" )
iptables ( "-t" , "nat" , "-X" , "DOCKER" )
mapper . mapping = make ( map [ int ] net . TCPAddr )
return nil
}
func ( mapper * PortMapper ) setup ( ) error {
if err := iptables ( "-t" , "nat" , "-N" , "DOCKER" ) ; err != nil {
2013-03-28 15:44:54 -04:00
return fmt . Errorf ( "Failed to create DOCKER chain: %s" , err )
2013-02-20 21:20:18 -05:00
}
2013-04-03 18:32:46 -04:00
if err := iptables ( "-t" , "nat" , "-A" , "PREROUTING" , "-m" , "addrtype" , "--dst-type" , "LOCAL" , "-j" , "DOCKER" ) ; err != nil {
2013-03-28 15:44:54 -04:00
return fmt . Errorf ( "Failed to inject docker in PREROUTING chain: %s" , err )
2013-02-28 14:50:02 -05:00
}
2013-04-04 15:56:37 -04:00
if err := iptables ( "-t" , "nat" , "-A" , "OUTPUT" , "-m" , "addrtype" , "--dst-type" , "LOCAL" , "-j" , "DOCKER" ) ; err != nil {
2013-03-28 15:44:54 -04:00
return fmt . Errorf ( "Failed to inject docker in OUTPUT chain: %s" , err )
2013-03-22 10:06:14 -04:00
}
2013-02-28 14:50:02 -05:00
return nil
}
func ( mapper * PortMapper ) iptablesForward ( rule string , port int , dest net . TCPAddr ) error {
return iptables ( "-t" , "nat" , rule , "DOCKER" , "-p" , "tcp" , "--dport" , strconv . Itoa ( port ) ,
"-j" , "DNAT" , "--to-destination" , net . JoinHostPort ( dest . IP . String ( ) , strconv . Itoa ( dest . Port ) ) )
}
func ( mapper * PortMapper ) Map ( port int , dest net . TCPAddr ) error {
if err := mapper . iptablesForward ( "-A" , port , dest ) ; err != nil {
return err
}
mapper . mapping [ port ] = dest
return nil
}
2013-02-25 17:06:22 -05:00
2013-02-28 14:50:02 -05:00
func ( mapper * PortMapper ) Unmap ( port int ) error {
dest , ok := mapper . mapping [ port ]
if ! ok {
return errors . New ( "Port is not mapped" )
}
if err := mapper . iptablesForward ( "-D" , port , dest ) ; err != nil {
return err
2013-02-21 21:33:23 -05:00
}
2013-02-28 14:50:02 -05:00
delete ( mapper . mapping , port )
return nil
}
func newPortMapper ( ) ( * PortMapper , error ) {
mapper := & PortMapper { }
if err := mapper . cleanup ( ) ; err != nil {
2013-02-25 17:06:22 -05:00
return nil , err
2013-02-20 20:47:09 -05:00
}
2013-02-28 14:50:02 -05:00
if err := mapper . setup ( ) ; err != nil {
return nil , err
}
return mapper , nil
2013-02-20 20:47:09 -05:00
}
2013-02-25 13:45:23 -05:00
2013-02-28 14:50:02 -05:00
// Port allocator: Atomatically allocate and release networking ports
type PortAllocator struct {
2013-04-05 01:58:01 -04:00
inUse map [ int ] struct { }
2013-04-05 01:56:12 -04:00
fountain chan ( int )
2013-04-05 01:58:01 -04:00
lock sync . Mutex
2013-02-25 13:45:23 -05:00
}
2013-04-05 01:56:12 -04:00
func ( alloc * PortAllocator ) runFountain ( ) {
for {
for port := portRangeStart ; port < portRangeEnd ; port ++ {
alloc . fountain <- port
}
2013-02-25 13:45:23 -05:00
}
}
2013-04-05 01:56:12 -04:00
// FIXME: Release can no longer fail, change its prototype to reflect that.
2013-02-28 14:50:02 -05:00
func ( alloc * PortAllocator ) Release ( port int ) error {
2013-04-05 16:03:24 -04:00
Debugf ( "Releasing %d" , port )
2013-04-05 01:56:12 -04:00
alloc . lock . Lock ( )
delete ( alloc . inUse , port )
alloc . lock . Unlock ( )
return nil
}
func ( alloc * PortAllocator ) Acquire ( port int ) ( int , error ) {
2013-04-05 16:03:24 -04:00
Debugf ( "Acquiring %d" , port )
2013-04-05 01:56:12 -04:00
if port == 0 {
// Allocate a port from the fountain
for port := range alloc . fountain {
if _ , err := alloc . Acquire ( port ) ; err == nil {
return port , nil
}
}
return - 1 , fmt . Errorf ( "Port generator ended unexpectedly" )
2013-02-25 13:45:23 -05:00
}
2013-04-05 01:56:12 -04:00
alloc . lock . Lock ( )
defer alloc . lock . Unlock ( )
if _ , inUse := alloc . inUse [ port ] ; inUse {
return - 1 , fmt . Errorf ( "Port already in use: %d" , port )
}
alloc . inUse [ port ] = struct { } { }
return port , nil
2013-02-25 13:45:23 -05:00
}
2013-04-05 01:56:12 -04:00
func newPortAllocator ( ) ( * PortAllocator , error ) {
allocator := & PortAllocator {
2013-04-05 01:58:01 -04:00
inUse : make ( map [ int ] struct { } ) ,
2013-04-05 16:03:04 -04:00
fountain : make ( chan int ) ,
2013-04-05 01:56:12 -04:00
}
go allocator . runFountain ( )
2013-02-28 14:50:02 -05:00
return allocator , nil
}
// IP allocator: Atomatically allocate and release networking ports
type IPAllocator struct {
2013-03-30 18:32:10 -04:00
network * net . IPNet
queueAlloc chan allocatedIP
queueReleased chan net . IP
inUse map [ int32 ] struct { }
}
type allocatedIP struct {
ip net . IP
err error
2013-02-28 14:50:02 -05:00
}
2013-03-30 18:32:10 -04:00
func ( alloc * IPAllocator ) run ( ) {
2013-02-28 14:50:02 -05:00
firstIP , _ := networkRange ( alloc . network )
2013-03-30 18:32:10 -04:00
ipNum := ipToInt ( firstIP )
ownIP := ipToInt ( alloc . network . IP )
size := networkSize ( alloc . network . Mask )
pos := int32 ( 1 )
max := size - 2 // -1 for the broadcast address, -1 for the gateway address
for {
var (
newNum int32
inUse bool
)
// Find first unused IP, give up after one whole round
for attempt := int32 ( 0 ) ; attempt < max ; attempt ++ {
newNum = ipNum + pos
pos = pos % max + 1
// The network's IP is never okay to use
if newNum == ownIP {
continue
}
if _ , inUse = alloc . inUse [ newNum ] ; ! inUse {
// We found an unused IP
break
}
2013-02-25 13:45:23 -05:00
}
2013-03-30 18:32:10 -04:00
ip := allocatedIP { ip : intToIp ( newNum ) }
if inUse {
ip . err = errors . New ( "No unallocated IP available" )
2013-02-25 13:45:23 -05:00
}
2013-03-30 18:32:10 -04:00
select {
case alloc . queueAlloc <- ip :
alloc . inUse [ newNum ] = struct { } { }
case released := <- alloc . queueReleased :
r := ipToInt ( released )
delete ( alloc . inUse , r )
if inUse {
// If we couldn't allocate a new IP, the released one
// will be the only free one now, so instantly use it
// next time
pos = r - ipNum
} else {
// Use same IP as last time
if pos == 1 {
pos = max
} else {
pos --
}
}
2013-02-25 13:45:23 -05:00
}
2013-02-28 14:50:02 -05:00
}
}
func ( alloc * IPAllocator ) Acquire ( ) ( net . IP , error ) {
2013-03-30 18:32:10 -04:00
ip := <- alloc . queueAlloc
return ip . ip , ip . err
2013-02-28 14:50:02 -05:00
}
2013-03-30 18:32:10 -04:00
func ( alloc * IPAllocator ) Release ( ip net . IP ) {
alloc . queueReleased <- ip
2013-02-25 13:45:23 -05:00
}
2013-03-30 18:32:10 -04:00
func newIPAllocator ( network * net . IPNet ) * IPAllocator {
2013-02-28 14:50:02 -05:00
alloc := & IPAllocator {
2013-03-30 18:32:10 -04:00
network : network ,
queueAlloc : make ( chan allocatedIP ) ,
queueReleased : make ( chan net . IP ) ,
inUse : make ( map [ int32 ] struct { } ) ,
2013-02-28 14:50:02 -05:00
}
2013-03-30 18:32:10 -04:00
go alloc . run ( )
return alloc
2013-02-28 14:50:02 -05:00
}
// Network interface represents the networking stack of a container
type NetworkInterface struct {
IPNet net . IPNet
Gateway net . IP
manager * NetworkManager
extPorts [ ] int
}
// Allocate an external TCP port and map it to the interface
2013-04-05 01:58:01 -04:00
func ( iface * NetworkInterface ) AllocatePort ( spec string ) ( * Nat , error ) {
nat , err := parseNat ( spec )
2013-02-28 14:50:02 -05:00
if err != nil {
2013-04-05 01:58:01 -04:00
return nil , err
}
// Allocate a random port if Frontend==0
if extPort , err := iface . manager . portAllocator . Acquire ( nat . Frontend ) ; err != nil {
return nil , err
} else {
nat . Frontend = extPort
}
if err := iface . manager . portMapper . Map ( nat . Frontend , net . TCPAddr { IP : iface . IPNet . IP , Port : nat . Backend } ) ; err != nil {
iface . manager . portAllocator . Release ( nat . Frontend )
return nil , err
}
iface . extPorts = append ( iface . extPorts , nat . Frontend )
return nat , nil
}
type Nat struct {
Proto string
Frontend int
Backend int
}
func parseNat ( spec string ) ( * Nat , error ) {
var nat Nat
// If spec starts with ':', external and internal ports must be the same.
// This might fail if the requested external port is not available.
var sameFrontend bool
if spec [ 0 ] == ':' {
sameFrontend = true
spec = spec [ 1 : ]
}
port , err := strconv . ParseUint ( spec , 10 , 16 )
if err != nil {
return nil , err
2013-02-28 14:50:02 -05:00
}
2013-04-05 01:58:01 -04:00
nat . Backend = int ( port )
if sameFrontend {
nat . Frontend = nat . Backend
2013-02-28 14:50:02 -05:00
}
2013-04-05 01:58:01 -04:00
nat . Proto = "tcp"
return & nat , nil
2013-02-28 14:50:02 -05:00
}
// Release: Network cleanup - release all resources
2013-03-30 18:32:10 -04:00
func ( iface * NetworkInterface ) Release ( ) {
2013-02-28 14:50:02 -05:00
for _ , port := range iface . extPorts {
if err := iface . manager . portMapper . Unmap ( port ) ; err != nil {
log . Printf ( "Unable to unmap port %v: %v" , port , err )
}
if err := iface . manager . portAllocator . Release ( port ) ; err != nil {
log . Printf ( "Unable to release port %v: %v" , port , err )
}
}
2013-03-30 18:32:10 -04:00
iface . manager . ipAllocator . Release ( iface . IPNet . IP )
2013-02-28 14:50:02 -05:00
}
// Network Manager manages a set of network interfaces
// Only *one* manager per host machine should be used
type NetworkManager struct {
bridgeIface string
bridgeNetwork * net . IPNet
ipAllocator * IPAllocator
portAllocator * PortAllocator
portMapper * PortMapper
}
// Allocate a network interface
func ( manager * NetworkManager ) Allocate ( ) ( * NetworkInterface , error ) {
ip , err := manager . ipAllocator . Acquire ( )
2013-02-25 13:45:23 -05:00
if err != nil {
2013-02-25 17:06:22 -05:00
return nil , err
2013-02-25 13:45:23 -05:00
}
2013-02-25 17:06:22 -05:00
iface := & NetworkInterface {
2013-03-20 09:02:25 -04:00
IPNet : net . IPNet { IP : ip , Mask : manager . bridgeNetwork . Mask } ,
2013-02-28 14:50:02 -05:00
Gateway : manager . bridgeNetwork . IP ,
manager : manager ,
2013-02-25 17:06:22 -05:00
}
return iface , nil
}
2013-02-28 14:50:02 -05:00
func newNetworkManager ( bridgeIface string ) ( * NetworkManager , error ) {
addr , err := getIfaceAddr ( bridgeIface )
if err != nil {
2013-04-04 22:14:10 -04:00
return nil , fmt . Errorf ( "Couldn't find bridge interface %s (%s).\nPlease create it with 'ip link add lxcbr0 type bridge; ip addr add ADDRESS/MASK dev lxcbr0'" , bridgeIface , err )
2013-02-28 14:50:02 -05:00
}
network := addr . ( * net . IPNet )
2013-03-30 18:32:10 -04:00
ipAllocator := newIPAllocator ( network )
2013-02-28 14:50:02 -05:00
2013-04-05 01:56:12 -04:00
portAllocator , err := newPortAllocator ( )
2013-02-28 14:50:02 -05:00
if err != nil {
return nil , err
}
portMapper , err := newPortMapper ( )
2013-03-23 00:43:31 -04:00
if err != nil {
return nil , err
}
2013-02-28 14:50:02 -05:00
manager := & NetworkManager {
bridgeIface : bridgeIface ,
bridgeNetwork : network ,
ipAllocator : ipAllocator ,
portAllocator : portAllocator ,
portMapper : portMapper ,
}
return manager , nil
2013-02-25 13:45:23 -05:00
}