package lxc import ( "fmt" "github.com/dotcloud/docker/engine" "github.com/dotcloud/docker/networkdriver" "github.com/dotcloud/docker/networkdriver/ipallocator" "github.com/dotcloud/docker/networkdriver/portallocator" "github.com/dotcloud/docker/networkdriver/portmapper" "github.com/dotcloud/docker/pkg/iptables" "github.com/dotcloud/docker/pkg/netlink" "github.com/dotcloud/docker/utils" "io/ioutil" "log" "net" "strings" "syscall" "unsafe" ) const ( DefaultNetworkBridge = "docker0" siocBRADDBR = 0x89a0 ) // Network interface represents the networking stack of a container type networkInterface struct { IP net.IP PortMappings []net.Addr // there are mappings to the host interfaces } var ( addrs = []string{ // Here we don't follow the convention of using the 1st IP of the range for the gateway. // This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges. // In theory this shouldn't matter - in practice there's bound to be a few scripts relying // on the internal addressing or other stupid things like that. // The shouldn't, but hey, let's not break them unless we really have to. "172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23 "10.0.42.1/16", // Don't even try using the entire /8, that's too intrusive "10.1.42.1/16", "10.42.42.1/16", "172.16.42.1/24", "172.16.43.1/24", "172.16.44.1/24", "10.0.42.1/24", "10.0.43.1/24", "192.168.42.1/24", "192.168.43.1/24", "192.168.44.1/24", } bridgeIface string bridgeNetwork *net.IPNet defaultBindingIP = net.ParseIP("0.0.0.0") currentInterfaces = make(map[string]*networkInterface) ) func init() { if err := engine.Register("init_networkdriver", InitDriver); err != nil { panic(err) } } func InitDriver(job *engine.Job) engine.Status { var ( network *net.IPNet enableIPTables = job.GetenvBool("EnableIptables") icc = job.GetenvBool("InterContainerCommunication") ipForward = job.GetenvBool("EnableIpForward") bridgeIP = job.Getenv("BridgeIP") ) if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" { defaultBindingIP = net.ParseIP(defaultIP) } bridgeIface = job.Getenv("BridgeIface") if bridgeIface == "" { bridgeIface = DefaultNetworkBridge } addr, err := networkdriver.GetIfaceAddr(bridgeIface) if err != nil { // If the iface is not found, try to create it job.Logf("creating new bridge for %s", bridgeIface) if err := createBridge(bridgeIP); err != nil { job.Error(err) return engine.StatusErr } job.Logf("getting iface addr") addr, err = networkdriver.GetIfaceAddr(bridgeIface) if err != nil { job.Error(err) return engine.StatusErr } network = addr.(*net.IPNet) } else { network = addr.(*net.IPNet) } // Configure iptables for link support if enableIPTables { if err := setupIPTables(addr, icc); err != nil { job.Error(err) return engine.StatusErr } } if ipForward { // Enable IPv4 forwarding if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil { job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err) } } // We can always try removing the iptables if err := iptables.RemoveExistingChain("DOCKER"); err != nil { job.Error(err) return engine.StatusErr } if enableIPTables { chain, err := iptables.NewChain("DOCKER", bridgeIface) if err != nil { job.Error(err) return engine.StatusErr } portmapper.SetIptablesChain(chain) } bridgeNetwork = network // https://github.com/dotcloud/docker/issues/2768 job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP) for name, f := range map[string]engine.Handler{ "allocate_interface": Allocate, "release_interface": Release, "allocate_port": AllocatePort, "link": LinkContainers, } { if err := job.Eng.Register(name, f); err != nil { job.Error(err) return engine.StatusErr } } return engine.StatusOK } func setupIPTables(addr net.Addr, icc bool) error { // Enable NAT natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-d", addr.String(), "-j", "MASQUERADE"} if !iptables.Exists(natArgs...) { if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil { return fmt.Errorf("Unable to enable network bridge NAT: %s", err) } else if len(output) != 0 { return fmt.Errorf("Error iptables postrouting: %s", output) } } var ( args = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"} acceptArgs = append(args, "ACCEPT") dropArgs = append(args, "DROP") ) if !icc { iptables.Raw(append([]string{"-D"}, acceptArgs...)...) if !iptables.Exists(dropArgs...) { utils.Debugf("Disable inter-container communication") if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil { return fmt.Errorf("Unable to prevent intercontainer communication: %s", err) } else if len(output) != 0 { return fmt.Errorf("Error disabling intercontainer communication: %s", output) } } } else { iptables.Raw(append([]string{"-D"}, dropArgs...)...) if !iptables.Exists(acceptArgs...) { utils.Debugf("Enable inter-container communication") if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil { return fmt.Errorf("Unable to allow intercontainer communication: %s", err) } else if len(output) != 0 { return fmt.Errorf("Error enabling intercontainer communication: %s", output) } } } // Accept all non-intercontainer outgoing packets outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"} if !iptables.Exists(outgoingArgs...) { if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil { return fmt.Errorf("Unable to allow outgoing packets: %s", err) } else if len(output) != 0 { return fmt.Errorf("Error iptables allow outgoing: %s", output) } } // Accept incoming packets for existing connections existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"} if !iptables.Exists(existingArgs...) { if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil { return fmt.Errorf("Unable to allow incoming packets: %s", err) } else if len(output) != 0 { return fmt.Errorf("Error iptables allow incoming: %s", output) } } return nil } // CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`, // and attempts to configure it with an address which doesn't conflict with any other interface on the host. // If it can't find an address which doesn't conflict, it will return an error. func createBridge(bridgeIP string) error { nameservers := []string{} resolvConf, _ := utils.GetResolvConf() // we don't check for an error here, because we don't really care // if we can't read /etc/resolv.conf. So instead we skip the append // if resolvConf is nil. It either doesn't exist, or we can't read it // for some reason. if resolvConf != nil { nameservers = append(nameservers, utils.GetNameserversAsCIDR(resolvConf)...) } var ifaceAddr string if len(bridgeIP) != 0 { _, _, err := net.ParseCIDR(bridgeIP) if err != nil { return err } ifaceAddr = bridgeIP } else { for _, addr := range addrs { _, dockerNetwork, err := net.ParseCIDR(addr) if err != nil { return err } if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil { if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil { ifaceAddr = addr break } else { utils.Debugf("%s %s", addr, err) } } } } if ifaceAddr == "" { return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface) } utils.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr) if err := createBridgeIface(bridgeIface); err != nil { return err } iface, err := net.InterfaceByName(bridgeIface) if err != nil { return err } ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr) if err != nil { return err } if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil { return fmt.Errorf("Unable to add private network: %s", err) } if err := netlink.NetworkLinkUp(iface); err != nil { return fmt.Errorf("Unable to start network bridge: %s", err) } return nil } // Create the actual bridge device. This is more backward-compatible than // netlink.NetworkLinkAdd and works on RHEL 6. func createBridgeIface(name string) error { s, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_IP) if err != nil { utils.Debugf("Bridge socket creation failed IPv6 probably not enabled: %v", err) s, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP) if err != nil { return fmt.Errorf("Error creating bridge creation socket: %s", err) } } defer syscall.Close(s) nameBytePtr, err := syscall.BytePtrFromString(name) if err != nil { return fmt.Errorf("Error converting bridge name %s to byte array: %s", name, err) } if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), siocBRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 { return fmt.Errorf("Error creating bridge: %s", err) } return nil } // Allocate a network interface func Allocate(job *engine.Job) engine.Status { var ( ip *net.IP err error id = job.Args[0] requestedIP = net.ParseIP(job.Getenv("RequestedIP")) ) if requestedIP != nil { ip, err = ipallocator.RequestIP(bridgeNetwork, &requestedIP) } else { ip, err = ipallocator.RequestIP(bridgeNetwork, nil) } if err != nil { job.Error(err) return engine.StatusErr } out := engine.Env{} out.Set("IP", ip.String()) out.Set("Mask", bridgeNetwork.Mask.String()) out.Set("Gateway", bridgeNetwork.IP.String()) out.Set("Bridge", bridgeIface) size, _ := bridgeNetwork.Mask.Size() out.SetInt("IPPrefixLen", size) currentInterfaces[id] = &networkInterface{ IP: *ip, } out.WriteTo(job.Stdout) return engine.StatusOK } // release an interface for a select ip func Release(job *engine.Job) engine.Status { var ( id = job.Args[0] containerInterface = currentInterfaces[id] ip net.IP port int proto string ) if containerInterface == nil { return job.Errorf("No network information to release for %s", id) } for _, nat := range containerInterface.PortMappings { if err := portmapper.Unmap(nat); err != nil { log.Printf("Unable to unmap port %s: %s", nat, err) } // this is host mappings switch a := nat.(type) { case *net.TCPAddr: proto = "tcp" ip = a.IP port = a.Port case *net.UDPAddr: proto = "udp" ip = a.IP port = a.Port } if err := portallocator.ReleasePort(ip, proto, port); err != nil { log.Printf("Unable to release port %s", nat) } } if err := ipallocator.ReleaseIP(bridgeNetwork, &containerInterface.IP); err != nil { log.Printf("Unable to release ip %s\n", err) } return engine.StatusOK } // Allocate an external port and map it to the interface func AllocatePort(job *engine.Job) engine.Status { var ( err error ip = defaultBindingIP id = job.Args[0] hostIP = job.Getenv("HostIP") hostPort = job.GetenvInt("HostPort") containerPort = job.GetenvInt("ContainerPort") proto = job.Getenv("Proto") network = currentInterfaces[id] ) if hostIP != "" { ip = net.ParseIP(hostIP) } // host ip, proto, and host port hostPort, err = portallocator.RequestPort(ip, proto, hostPort) if err != nil { job.Error(err) return engine.StatusErr } var ( container net.Addr host net.Addr ) if proto == "tcp" { host = &net.TCPAddr{IP: ip, Port: hostPort} container = &net.TCPAddr{IP: network.IP, Port: containerPort} } else { host = &net.UDPAddr{IP: ip, Port: hostPort} container = &net.UDPAddr{IP: network.IP, Port: containerPort} } if err := portmapper.Map(container, ip, hostPort); err != nil { portallocator.ReleasePort(ip, proto, hostPort) job.Error(err) return engine.StatusErr } network.PortMappings = append(network.PortMappings, host) out := engine.Env{} out.Set("HostIP", ip.String()) out.SetInt("HostPort", hostPort) if _, err := out.WriteTo(job.Stdout); err != nil { job.Error(err) return engine.StatusErr } return engine.StatusOK } func LinkContainers(job *engine.Job) engine.Status { var ( action = job.Args[0] childIP = job.Getenv("ChildIP") parentIP = job.Getenv("ParentIP") ignoreErrors = job.GetenvBool("IgnoreErrors") ports = job.GetenvList("Ports") ) split := func(p string) (string, string) { parts := strings.Split(p, "/") return parts[0], parts[1] } for _, p := range ports { port, proto := split(p) if output, err := iptables.Raw(action, "FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-p", proto, "-s", parentIP, "--dport", port, "-d", childIP, "-j", "ACCEPT"); !ignoreErrors && err != nil { job.Error(err) return engine.StatusErr } else if len(output) != 0 { job.Errorf("Error toggle iptables forward: %s", output) return engine.StatusErr } if output, err := iptables.Raw(action, "FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-p", proto, "-s", childIP, "--sport", port, "-d", parentIP, "-j", "ACCEPT"); !ignoreErrors && err != nil { job.Error(err) return engine.StatusErr } else if len(output) != 0 { job.Errorf("Error toggle iptables forward: %s", output) return engine.StatusErr } } return engine.StatusOK }