2013-02-21 01:47:09 +00:00
package docker
import (
2013-02-21 02:20:18 +00:00
"fmt"
2014-01-23 22:39:10 +00:00
"github.com/dotcloud/docker/networkdriver"
2014-01-23 09:31:38 +00:00
"github.com/dotcloud/docker/networkdriver/ipallocator"
2014-01-23 15:46:42 +00:00
"github.com/dotcloud/docker/networkdriver/portallocator"
2014-01-28 23:42:46 +00:00
"github.com/dotcloud/docker/networkdriver/portmapper"
2013-12-23 23:36:58 +00:00
"github.com/dotcloud/docker/pkg/iptables"
2013-12-23 23:39:39 +00:00
"github.com/dotcloud/docker/pkg/netlink"
2013-05-14 22:37:35 +00:00
"github.com/dotcloud/docker/utils"
2014-01-28 04:35:05 +00:00
"io/ioutil"
2013-02-28 19:50:02 +00:00
"log"
2013-02-21 01:47:09 +00:00
"net"
2013-02-28 19:50:02 +00:00
"strconv"
2013-12-02 17:03:21 +00:00
"syscall"
"unsafe"
2013-02-21 01:47:09 +00:00
)
const (
2013-04-05 21:16:19 +00:00
DefaultNetworkBridge = "docker0"
2013-07-22 00:49:09 +00:00
DisableNetworkBridge = "none"
2013-12-19 23:16:54 +00:00
DefaultNetworkMtu = 1500
2013-12-02 17:03:21 +00:00
siocBRADDBR = 0x89a0
2013-02-21 01:47:09 +00:00
)
2013-07-22 19:06:24 +00:00
// CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`,
// and attempts to configure it with an address which doesn't conflict with any other interface on the host.
// If it can't find an address which doesn't conflict, it will return an error.
2013-10-05 02:25:15 +00:00
func CreateBridgeIface ( config * DaemonConfig ) error {
2013-07-22 19:06:24 +00:00
addrs := [ ] string {
// Here we don't follow the convention of using the 1st IP of the range for the gateway.
// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
// on the internal addressing or other stupid things like that.
// The shouldn't, but hey, let's not break them unless we really have to.
2013-08-07 00:24:10 +00:00
"172.17.42.1/16" , // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
"10.0.42.1/16" , // Don't even try using the entire /8, that's too intrusive
2013-07-22 19:06:24 +00:00
"10.1.42.1/16" ,
"10.42.42.1/16" ,
"172.16.42.1/24" ,
"172.16.43.1/24" ,
"172.16.44.1/24" ,
"10.0.42.1/24" ,
"10.0.43.1/24" ,
"192.168.42.1/24" ,
"192.168.43.1/24" ,
"192.168.44.1/24" ,
}
2013-04-03 22:57:57 +00:00
2013-08-18 03:49:33 +00:00
nameservers := [ ] string { }
resolvConf , _ := utils . GetResolvConf ( )
// we don't check for an error here, because we don't really care
// if we can't read /etc/resolv.conf. So instead we skip the append
// if resolvConf is nil. It either doesn't exist, or we can't read it
// for some reason.
if resolvConf != nil {
nameservers = append ( nameservers , utils . GetNameserversAsCIDR ( resolvConf ) ... )
}
2013-04-03 22:57:57 +00:00
var ifaceAddr string
2013-12-13 15:47:19 +00:00
if len ( config . BridgeIp ) != 0 {
2014-01-23 22:39:10 +00:00
_ , _ , err := net . ParseCIDR ( config . BridgeIp )
2013-04-03 22:57:57 +00:00
if err != nil {
return err
}
2013-12-13 15:47:19 +00:00
ifaceAddr = config . BridgeIp
} else {
for _ , addr := range addrs {
_ , dockerNetwork , err := net . ParseCIDR ( addr )
if err != nil {
return err
}
2014-01-23 22:39:10 +00:00
if err := networkdriver . CheckNameserverOverlaps ( nameservers , dockerNetwork ) ; err == nil {
if err := networkdriver . CheckRouteOverlaps ( dockerNetwork ) ; err == nil {
ifaceAddr = addr
break
} else {
utils . Debugf ( "%s %s" , addr , err )
}
2013-08-18 03:49:33 +00:00
}
2013-04-03 22:57:57 +00:00
}
}
2014-01-23 09:31:38 +00:00
2013-04-03 22:57:57 +00:00
if ifaceAddr == "" {
2013-10-05 02:25:15 +00:00
return fmt . Errorf ( "Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'" , config . BridgeIface , config . BridgeIface )
2013-04-03 22:57:57 +00:00
}
2013-10-05 02:25:15 +00:00
utils . Debugf ( "Creating bridge %s with network %s" , config . BridgeIface , ifaceAddr )
2013-04-03 22:57:57 +00:00
2013-12-02 17:03:21 +00:00
if err := createBridgeIface ( config . BridgeIface ) ; err != nil {
return err
2013-04-03 22:57:57 +00:00
}
2013-09-19 08:57:11 +00:00
iface , err := net . InterfaceByName ( config . BridgeIface )
if err != nil {
return err
}
ipAddr , ipNet , err := net . ParseCIDR ( ifaceAddr )
if err != nil {
return err
}
if netlink . NetworkLinkAddIp ( iface , ipAddr , ipNet ) ; err != nil {
return fmt . Errorf ( "Unable to add private network: %s" , err )
2013-04-03 22:57:57 +00:00
}
2013-09-19 08:57:11 +00:00
if err := netlink . NetworkLinkUp ( iface ) ; err != nil {
return fmt . Errorf ( "Unable to start network bridge: %s" , err )
2013-04-03 22:57:57 +00:00
}
2013-10-10 20:48:22 +00:00
2013-04-03 22:57:57 +00:00
return nil
}
2013-12-02 17:03:21 +00:00
// Create the actual bridge device. This is more backward-compatible than
// netlink.NetworkLinkAdd and works on RHEL 6.
func createBridgeIface ( name string ) error {
s , err := syscall . Socket ( syscall . AF_INET6 , syscall . SOCK_STREAM , syscall . IPPROTO_IP )
if err != nil {
2013-12-13 18:39:49 +00:00
utils . Debugf ( "Bridge socket creation failed IPv6 probably not enabled: %v" , err )
s , err = syscall . Socket ( syscall . AF_INET , syscall . SOCK_STREAM , syscall . IPPROTO_IP )
if err != nil {
return fmt . Errorf ( "Error creating bridge creation socket: %s" , err )
}
2013-12-02 17:03:21 +00:00
}
defer syscall . Close ( s )
nameBytePtr , err := syscall . BytePtrFromString ( name )
if err != nil {
return fmt . Errorf ( "Error converting bridge name %s to byte array: %s" , name , err )
}
if _ , _ , err := syscall . Syscall ( syscall . SYS_IOCTL , uintptr ( s ) , siocBRADDBR , uintptr ( unsafe . Pointer ( nameBytePtr ) ) ) ; err != 0 {
return fmt . Errorf ( "Error creating bridge: %s" , err )
}
return nil
}
2013-02-28 19:50:02 +00:00
// Return the IPv4 address of a network interface
2013-02-25 22:06:22 +00:00
func getIfaceAddr ( name string ) ( net . Addr , error ) {
2013-02-21 02:20:18 +00:00
iface , err := net . InterfaceByName ( name )
if err != nil {
return nil , err
}
addrs , err := iface . Addrs ( )
if err != nil {
return nil , err
}
var addrs4 [ ] net . Addr
for _ , addr := range addrs {
ip := ( addr . ( * net . IPNet ) ) . IP
if ip4 := ip . To4 ( ) ; len ( ip4 ) == net . IPv4len {
addrs4 = append ( addrs4 , addr )
}
}
switch {
case len ( addrs4 ) == 0 :
2013-02-28 19:50:02 +00:00
return nil , fmt . Errorf ( "Interface %v has no IP addresses" , name )
2013-02-21 02:20:18 +00:00
case len ( addrs4 ) > 1 :
2013-03-21 16:19:22 +00:00
fmt . Printf ( "Interface %v has more than 1 IPv4 address. Defaulting to using %v\n" ,
name , ( addrs4 [ 0 ] . ( * net . IPNet ) ) . IP )
2013-02-21 02:20:18 +00:00
}
return addrs4 [ 0 ] , nil
}
2013-02-28 19:50:02 +00:00
// Network interface represents the networking stack of a container
type NetworkInterface struct {
IPNet net . IPNet
Gateway net . IP
manager * NetworkManager
2013-06-11 22:46:23 +00:00
extPorts [ ] * Nat
2013-07-22 00:49:09 +00:00
disabled bool
2013-02-28 19:50:02 +00:00
}
2013-10-05 02:25:15 +00:00
// Allocate an external port and map it to the interface
func ( iface * NetworkInterface ) AllocatePort ( port Port , binding PortBinding ) ( * Nat , error ) {
2013-07-22 00:49:09 +00:00
if iface . disabled {
return nil , fmt . Errorf ( "Trying to allocate port for interface %v, which is disabled" , iface ) // FIXME
}
2014-01-28 23:42:46 +00:00
ip := iface . manager . defaultBindingIP
2013-10-05 02:25:15 +00:00
if binding . HostIp != "" {
ip = net . ParseIP ( binding . HostIp )
} else {
binding . HostIp = ip . String ( )
}
nat := & Nat {
Port : port ,
Binding : binding ,
}
containerPort , err := parsePort ( port . Port ( ) )
2013-02-28 19:50:02 +00:00
if err != nil {
2013-04-05 05:58:01 +00:00
return nil , err
}
2013-06-11 22:46:23 +00:00
2013-10-05 02:25:15 +00:00
hostPort , _ := parsePort ( nat . Binding . HostPort )
2014-01-23 20:17:28 +00:00
extPort , err := portallocator . RequestPort ( ip , nat . Port . Proto ( ) , hostPort )
if err != nil {
return nil , err
}
2013-10-05 02:25:15 +00:00
2014-01-23 20:17:28 +00:00
var backend net . Addr
if nat . Port . Proto ( ) == "tcp" {
backend = & net . TCPAddr { IP : iface . IPNet . IP , Port : containerPort }
2013-06-11 22:46:23 +00:00
} else {
2014-01-23 20:17:28 +00:00
backend = & net . UDPAddr { IP : iface . IPNet . IP , Port : containerPort }
2013-04-05 05:58:01 +00:00
}
2014-01-23 20:17:28 +00:00
2014-01-28 23:42:46 +00:00
if err := portmapper . Map ( backend , ip , extPort ) ; err != nil {
2014-01-23 20:17:28 +00:00
portallocator . ReleasePort ( ip , nat . Port . Proto ( ) , extPort )
return nil , err
}
nat . Binding . HostPort = strconv . Itoa ( extPort )
2013-06-11 22:46:23 +00:00
iface . extPorts = append ( iface . extPorts , nat )
2013-04-05 05:58:01 +00:00
return nat , nil
}
type Nat struct {
2013-10-05 02:25:15 +00:00
Port Port
Binding PortBinding
2013-04-05 05:58:01 +00:00
}
2013-10-05 02:25:15 +00:00
func ( n * Nat ) String ( ) string {
2013-11-30 00:53:20 +00:00
return fmt . Sprintf ( "%s:%s:%s/%s" , n . Binding . HostIp , n . Binding . HostPort , n . Port . Port ( ) , n . Port . Proto ( ) )
2013-02-28 19:50:02 +00:00
}
// Release: Network cleanup - release all resources
2013-03-30 22:32:10 +00:00
func ( iface * NetworkInterface ) Release ( ) {
2013-07-22 00:49:09 +00:00
if iface . disabled {
return
}
2013-06-11 22:46:23 +00:00
for _ , nat := range iface . extPorts {
2013-10-05 02:25:15 +00:00
hostPort , err := parsePort ( nat . Binding . HostPort )
if err != nil {
log . Printf ( "Unable to get host port: %s" , err )
continue
}
ip := net . ParseIP ( nat . Binding . HostIp )
2013-11-21 16:26:07 +00:00
utils . Debugf ( "Unmaping %s/%s:%s" , nat . Port . Proto , ip . String ( ) , nat . Binding . HostPort )
2014-01-28 23:42:46 +00:00
var host net . Addr
if nat . Port . Proto ( ) == "tcp" {
host = & net . TCPAddr { IP : ip , Port : hostPort }
} else {
host = & net . UDPAddr { IP : ip , Port : hostPort }
}
if err := portmapper . Unmap ( host ) ; err != nil {
2013-10-05 02:25:15 +00:00
log . Printf ( "Unable to unmap port %s: %s" , nat , err )
2013-02-28 19:50:02 +00:00
}
2013-11-21 16:26:07 +00:00
2014-01-23 20:17:28 +00:00
if err := portallocator . ReleasePort ( ip , nat . Port . Proto ( ) , hostPort ) ; err != nil {
log . Printf ( "Unable to release port %s" , nat )
2013-02-28 19:50:02 +00:00
}
}
2013-03-30 22:32:10 +00:00
2014-01-23 09:31:38 +00:00
if err := ipallocator . ReleaseIP ( iface . manager . bridgeNetwork , & iface . IPNet . IP ) ; err != nil {
log . Printf ( "Unable to release ip %s\n" , err )
}
2013-02-28 19:50:02 +00:00
}
// Network Manager manages a set of network interfaces
// Only *one* manager per host machine should be used
type NetworkManager struct {
2014-01-28 23:42:46 +00:00
bridgeIface string
bridgeNetwork * net . IPNet
defaultBindingIP net . IP
disabled bool
2013-02-28 19:50:02 +00:00
}
// Allocate a network interface
func ( manager * NetworkManager ) Allocate ( ) ( * NetworkInterface , error ) {
2013-07-22 00:49:09 +00:00
if manager . disabled {
return & NetworkInterface { disabled : true } , nil
}
2014-01-23 09:31:38 +00:00
var ip * net . IP
2013-08-21 14:37:58 +00:00
var err error
2014-01-23 09:31:38 +00:00
ip , err = ipallocator . RequestIP ( manager . bridgeNetwork , nil )
2013-02-25 18:45:23 +00:00
if err != nil {
2013-02-25 22:06:22 +00:00
return nil , err
2013-02-25 18:45:23 +00:00
}
2013-08-21 14:37:58 +00:00
2013-02-25 22:06:22 +00:00
iface := & NetworkInterface {
2014-01-23 09:31:38 +00:00
IPNet : net . IPNet { IP : * ip , Mask : manager . bridgeNetwork . Mask } ,
2013-02-28 19:50:02 +00:00
Gateway : manager . bridgeNetwork . IP ,
manager : manager ,
2013-02-25 22:06:22 +00:00
}
return iface , nil
}
2013-10-05 02:25:15 +00:00
func newNetworkManager ( config * DaemonConfig ) ( * NetworkManager , error ) {
if config . BridgeIface == DisableNetworkBridge {
2013-07-22 00:49:09 +00:00
manager := & NetworkManager {
disabled : true ,
}
return manager , nil
}
2014-01-23 13:22:32 +00:00
var network * net . IPNet
2013-10-05 02:25:15 +00:00
addr , err := getIfaceAddr ( config . BridgeIface )
2013-02-28 19:50:02 +00:00
if err != nil {
2013-04-03 22:57:57 +00:00
// If the iface is not found, try to create it
2013-10-05 02:25:15 +00:00
if err := CreateBridgeIface ( config ) ; err != nil {
2013-04-03 22:57:57 +00:00
return nil , err
}
2013-10-05 02:25:15 +00:00
addr , err = getIfaceAddr ( config . BridgeIface )
2013-04-03 22:57:57 +00:00
if err != nil {
return nil , err
}
2014-01-23 13:22:32 +00:00
network = addr . ( * net . IPNet )
} else {
network = addr . ( * net . IPNet )
2013-02-28 19:50:02 +00:00
}
2013-10-24 16:08:50 +00:00
// Configure iptables for link support
if config . EnableIptables {
2013-11-27 08:10:44 +00:00
// Enable NAT
natArgs := [ ] string { "POSTROUTING" , "-t" , "nat" , "-s" , addr . String ( ) , "!" , "-d" , addr . String ( ) , "-j" , "MASQUERADE" }
if ! iptables . Exists ( natArgs ... ) {
2014-01-30 17:03:25 +00:00
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , natArgs ... ) ... ) ; err != nil {
2013-11-27 08:10:44 +00:00
return nil , fmt . Errorf ( "Unable to enable network bridge NAT: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error iptables postrouting: %s" , output )
}
}
2013-11-01 23:29:25 +00:00
args := [ ] string { "FORWARD" , "-i" , config . BridgeIface , "-o" , config . BridgeIface , "-j" }
acceptArgs := append ( args , "ACCEPT" )
dropArgs := append ( args , "DROP" )
2013-10-24 16:08:50 +00:00
if ! config . InterContainerCommunication {
2013-11-01 23:29:25 +00:00
iptables . Raw ( append ( [ ] string { "-D" } , acceptArgs ... ) ... )
if ! iptables . Exists ( dropArgs ... ) {
2013-10-24 16:08:50 +00:00
utils . Debugf ( "Disable inter-container communication" )
2013-11-01 23:29:25 +00:00
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , dropArgs ... ) ... ) ; err != nil {
2013-10-24 16:08:50 +00:00
return nil , fmt . Errorf ( "Unable to prevent intercontainer communication: %s" , err )
2013-11-05 15:33:07 +00:00
} else if len ( output ) != 0 {
2013-11-01 23:29:25 +00:00
return nil , fmt . Errorf ( "Error disabling intercontainer communication: %s" , output )
2013-10-24 16:08:50 +00:00
}
}
} else {
2013-11-01 23:29:25 +00:00
iptables . Raw ( append ( [ ] string { "-D" } , dropArgs ... ) ... )
if ! iptables . Exists ( acceptArgs ... ) {
utils . Debugf ( "Enable inter-container communication" )
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , acceptArgs ... ) ... ) ; err != nil {
return nil , fmt . Errorf ( "Unable to allow intercontainer communication: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error enabling intercontainer communication: %s" , output )
}
}
2013-10-24 16:08:50 +00:00
}
2014-01-24 22:57:04 +00:00
// Accept all non-intercontainer outgoing packets
outgoingArgs := [ ] string { "FORWARD" , "-i" , config . BridgeIface , "!" , "-o" , config . BridgeIface , "-j" , "ACCEPT" }
if ! iptables . Exists ( outgoingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , outgoingArgs ... ) ... ) ; err != nil {
return nil , fmt . Errorf ( "Unable to allow outgoing packets: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error iptables allow outgoing: %s" , output )
}
}
// Accept incoming packets for existing connections
existingArgs := [ ] string { "FORWARD" , "-o" , config . BridgeIface , "-m" , "conntrack" , "--ctstate" , "RELATED,ESTABLISHED" , "-j" , "ACCEPT" }
if ! iptables . Exists ( existingArgs ... ) {
if output , err := iptables . Raw ( append ( [ ] string { "-I" } , existingArgs ... ) ... ) ; err != nil {
return nil , fmt . Errorf ( "Unable to allow incoming packets: %s" , err )
} else if len ( output ) != 0 {
return nil , fmt . Errorf ( "Error iptables allow incoming: %s" , output )
}
}
2013-10-24 16:08:50 +00:00
}
2014-01-28 04:35:05 +00:00
if config . EnableIpForward {
// Enable IPv4 forwarding
if err := ioutil . WriteFile ( "/proc/sys/net/ipv4/ip_forward" , [ ] byte { '1' , '\n' } , 0644 ) ; err != nil {
log . Printf ( "WARNING: unable to enable IPv4 forwarding: %s\n" , err )
}
}
2014-01-28 23:42:46 +00:00
// We can always try removing the iptables
2014-01-29 00:28:32 +00:00
if err := iptables . RemoveExistingChain ( "DOCKER" ) ; err != nil {
2013-03-23 04:43:31 +00:00
return nil , err
}
2013-02-28 19:50:02 +00:00
2014-01-28 23:42:46 +00:00
if config . EnableIptables {
2014-01-29 00:28:32 +00:00
chain , err := iptables . NewChain ( "DOCKER" , config . BridgeIface )
if err != nil {
2014-01-28 23:42:46 +00:00
return nil , err
}
2014-01-29 00:28:32 +00:00
portmapper . SetIptablesChain ( chain )
2013-02-28 19:50:02 +00:00
}
2013-10-08 22:42:02 +00:00
2014-01-28 23:42:46 +00:00
manager := & NetworkManager {
bridgeIface : config . BridgeIface ,
bridgeNetwork : network ,
defaultBindingIP : config . DefaultIp ,
}
2013-02-28 19:50:02 +00:00
return manager , nil
2013-02-25 18:45:23 +00:00
}