2015-07-02 01:00:48 -04:00
|
|
|
package osl
|
2015-02-19 20:21:42 -05:00
|
|
|
|
2015-03-11 19:37:43 -04:00
|
|
|
import (
|
2021-05-27 20:15:56 -04:00
|
|
|
"errors"
|
2015-03-11 19:37:43 -04:00
|
|
|
"fmt"
|
2015-04-13 21:36:58 -04:00
|
|
|
"net"
|
2015-03-11 19:37:43 -04:00
|
|
|
"os"
|
2015-05-26 17:14:01 -04:00
|
|
|
"os/exec"
|
2016-07-15 00:25:52 -04:00
|
|
|
"path/filepath"
|
2015-03-11 19:37:43 -04:00
|
|
|
"runtime"
|
2016-06-10 20:32:19 -04:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
2015-04-28 01:57:36 -04:00
|
|
|
"sync"
|
2015-03-11 19:37:43 -04:00
|
|
|
"syscall"
|
2015-05-26 17:14:01 -04:00
|
|
|
"time"
|
2015-03-11 19:37:43 -04:00
|
|
|
|
2021-04-05 20:24:47 -04:00
|
|
|
"github.com/docker/docker/libnetwork/ns"
|
|
|
|
"github.com/docker/docker/libnetwork/osl/kernel"
|
|
|
|
"github.com/docker/docker/libnetwork/types"
|
2021-05-27 20:15:56 -04:00
|
|
|
"github.com/docker/docker/pkg/reexec"
|
2017-07-26 17:18:31 -04:00
|
|
|
"github.com/sirupsen/logrus"
|
2015-03-11 19:37:43 -04:00
|
|
|
"github.com/vishvananda/netlink"
|
|
|
|
"github.com/vishvananda/netns"
|
2021-05-27 20:15:56 -04:00
|
|
|
"golang.org/x/sys/unix"
|
2015-03-11 19:37:43 -04:00
|
|
|
)
|
2015-02-24 14:19:00 -05:00
|
|
|
|
2016-07-15 00:25:52 -04:00
|
|
|
const defaultPrefix = "/var/run/docker"
|
2015-04-28 01:57:36 -04:00
|
|
|
|
2016-02-23 16:28:14 -05:00
|
|
|
func init() {
|
|
|
|
reexec.Register("set-ipv6", reexecSetIPv6)
|
|
|
|
}
|
|
|
|
|
2015-05-26 17:14:01 -04:00
|
|
|
var (
|
2022-01-11 23:13:39 -05:00
|
|
|
once sync.Once
|
|
|
|
garbagePathMap = make(map[string]bool)
|
|
|
|
gpmLock sync.Mutex
|
|
|
|
gpmWg sync.WaitGroup
|
|
|
|
gpmCleanupPeriod = 60 * time.Second
|
|
|
|
gpmChan = make(chan chan struct{})
|
|
|
|
prefix = defaultPrefix
|
2015-05-26 17:14:01 -04:00
|
|
|
)
|
2015-04-28 01:57:36 -04:00
|
|
|
|
2015-04-13 14:40:42 -04:00
|
|
|
// The networkNamespace type is the linux implementation of the Sandbox
|
|
|
|
// interface. It represents a linux network namespace, and moves an interface
|
|
|
|
// into it when called on method AddInterface or sets the gateway etc.
|
2015-02-19 20:21:42 -05:00
|
|
|
type networkNamespace struct {
|
2015-06-04 23:21:23 -04:00
|
|
|
path string
|
|
|
|
iFaces []*nwIface
|
|
|
|
gw net.IP
|
|
|
|
gwv6 net.IP
|
|
|
|
staticRoutes []*types.StaticRoute
|
2015-06-15 14:35:13 -04:00
|
|
|
neighbors []*neigh
|
2017-02-27 17:23:12 -05:00
|
|
|
nextIfIndex map[string]int
|
2015-12-10 17:35:49 -05:00
|
|
|
isDefault bool
|
2016-05-16 14:51:40 -04:00
|
|
|
nlHandle *netlink.Handle
|
2016-02-23 16:28:14 -05:00
|
|
|
loV6Enabled bool
|
2015-05-21 14:04:49 -04:00
|
|
|
sync.Mutex
|
2015-02-19 20:21:42 -05:00
|
|
|
}
|
|
|
|
|
2016-07-15 00:25:52 -04:00
|
|
|
// SetBasePath sets the base url prefix for the ns path
|
|
|
|
func SetBasePath(path string) {
|
|
|
|
prefix = path
|
|
|
|
}
|
|
|
|
|
2015-05-26 17:14:01 -04:00
|
|
|
func init() {
|
|
|
|
reexec.Register("netns-create", reexecCreateNamespace)
|
|
|
|
}
|
|
|
|
|
2016-07-15 00:25:52 -04:00
|
|
|
func basePath() string {
|
|
|
|
return filepath.Join(prefix, "netns")
|
|
|
|
}
|
|
|
|
|
2015-04-30 01:58:12 -04:00
|
|
|
func createBasePath() {
|
2016-07-15 00:25:52 -04:00
|
|
|
err := os.MkdirAll(basePath(), 0755)
|
Simplify and fix os.MkdirAll() usage
TL;DR: check for IsExist(err) after a failed MkdirAll() is both
redundant and wrong -- so two reasons to remove it.
Quoting MkdirAll documentation:
> MkdirAll creates a directory named path, along with any necessary
> parents, and returns nil, or else returns an error. If path
> is already a directory, MkdirAll does nothing and returns nil.
This means two things:
1. If a directory to be created already exists, no error is
returned.
2. If the error returned is IsExist (EEXIST), it means there exists
a non-directory with the same name as MkdirAll need to use for
directory. Example: we want to MkdirAll("a/b"), but file "a"
(or "a/b") already exists, so MkdirAll fails.
The above is a theory, based on quoted documentation and my UNIX
knowledge.
3. In practice, though, current MkdirAll implementation [1] returns
ENOTDIR in most of cases described in #2, with the exception when
there is a race between MkdirAll and someone else creating the
last component of MkdirAll argument as a file. In this very case
MkdirAll() will indeed return EEXIST.
Because of #1, IsExist check after MkdirAll is not needed.
Because of #2 and #3, ignoring IsExist error is just plain wrong,
as directory we require is not created. It's cleaner to report
the error now.
[1] https://github.com/golang/go/blob/f9ed2f75/src/os/path.go
Signed-off-by: Kir Kolyshkin <kir@openvz.org>
2015-07-29 21:08:29 -04:00
|
|
|
if err != nil {
|
2015-04-28 01:57:36 -04:00
|
|
|
panic("Could not create net namespace path directory")
|
|
|
|
}
|
2015-05-26 17:14:01 -04:00
|
|
|
|
|
|
|
// Start the garbage collection go routine
|
|
|
|
go removeUnusedPaths()
|
|
|
|
}
|
|
|
|
|
|
|
|
func removeUnusedPaths() {
|
2015-05-28 19:29:21 -04:00
|
|
|
gpmLock.Lock()
|
|
|
|
period := gpmCleanupPeriod
|
|
|
|
gpmLock.Unlock()
|
|
|
|
|
2015-06-05 14:46:33 -04:00
|
|
|
ticker := time.NewTicker(period)
|
|
|
|
for {
|
|
|
|
var (
|
|
|
|
gc chan struct{}
|
|
|
|
gcOk bool
|
|
|
|
)
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ticker.C:
|
|
|
|
case gc, gcOk = <-gpmChan:
|
|
|
|
}
|
|
|
|
|
2015-05-26 17:14:01 -04:00
|
|
|
gpmLock.Lock()
|
|
|
|
pathList := make([]string, 0, len(garbagePathMap))
|
|
|
|
for path := range garbagePathMap {
|
|
|
|
pathList = append(pathList, path)
|
|
|
|
}
|
|
|
|
garbagePathMap = make(map[string]bool)
|
|
|
|
gpmWg.Add(1)
|
|
|
|
gpmLock.Unlock()
|
|
|
|
|
|
|
|
for _, path := range pathList {
|
|
|
|
os.Remove(path)
|
|
|
|
}
|
|
|
|
|
|
|
|
gpmWg.Done()
|
2015-06-05 14:46:33 -04:00
|
|
|
if gcOk {
|
|
|
|
close(gc)
|
|
|
|
}
|
2015-05-26 17:14:01 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func addToGarbagePaths(path string) {
|
|
|
|
gpmLock.Lock()
|
|
|
|
garbagePathMap[path] = true
|
2015-05-27 17:40:02 -04:00
|
|
|
gpmLock.Unlock()
|
2015-05-26 17:14:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func removeFromGarbagePaths(path string) {
|
|
|
|
gpmLock.Lock()
|
|
|
|
delete(garbagePathMap, path)
|
2015-05-27 17:40:02 -04:00
|
|
|
gpmLock.Unlock()
|
2015-04-28 01:57:36 -04:00
|
|
|
}
|
|
|
|
|
2015-06-05 14:46:33 -04:00
|
|
|
// GC triggers garbage collection of namespace path right away
|
|
|
|
// and waits for it.
|
|
|
|
func GC() {
|
2015-06-10 14:55:14 -04:00
|
|
|
gpmLock.Lock()
|
|
|
|
if len(garbagePathMap) == 0 {
|
|
|
|
// No need for GC if map is empty
|
|
|
|
gpmLock.Unlock()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
gpmLock.Unlock()
|
2015-06-05 14:46:33 -04:00
|
|
|
|
2015-06-10 14:55:14 -04:00
|
|
|
// if content exists in the garbage paths
|
|
|
|
// we can trigger GC to run, providing a
|
|
|
|
// channel to be notified on completion
|
|
|
|
waitGC := make(chan struct{})
|
2015-06-05 14:46:33 -04:00
|
|
|
gpmChan <- waitGC
|
2015-06-10 14:55:14 -04:00
|
|
|
// wait for GC completion
|
2015-06-05 14:46:33 -04:00
|
|
|
<-waitGC
|
|
|
|
}
|
|
|
|
|
2015-04-28 01:57:36 -04:00
|
|
|
// GenerateKey generates a sandbox key based on the passed
|
|
|
|
// container id.
|
|
|
|
func GenerateKey(containerID string) string {
|
|
|
|
maxLen := 12
|
2016-06-10 20:32:19 -04:00
|
|
|
// Read sandbox key from host for overlay
|
|
|
|
if strings.HasPrefix(containerID, "-") {
|
|
|
|
var (
|
|
|
|
index int
|
|
|
|
indexStr string
|
|
|
|
tmpkey string
|
|
|
|
)
|
2021-08-24 06:10:50 -04:00
|
|
|
dir, err := os.ReadDir(basePath())
|
2016-06-10 20:32:19 -04:00
|
|
|
if err != nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, v := range dir {
|
|
|
|
id := v.Name()
|
|
|
|
if strings.HasSuffix(id, containerID[:maxLen-1]) {
|
|
|
|
indexStr = strings.TrimSuffix(id, containerID[:maxLen-1])
|
|
|
|
tmpindex, err := strconv.Atoi(indexStr)
|
|
|
|
if err != nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
if tmpindex > index {
|
|
|
|
index = tmpindex
|
|
|
|
tmpkey = id
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
containerID = tmpkey
|
|
|
|
if containerID == "" {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-28 01:57:36 -04:00
|
|
|
if len(containerID) < maxLen {
|
|
|
|
maxLen = len(containerID)
|
|
|
|
}
|
|
|
|
|
2016-07-15 00:25:52 -04:00
|
|
|
return basePath() + "/" + containerID[:maxLen]
|
2015-04-28 01:57:36 -04:00
|
|
|
}
|
|
|
|
|
2015-04-13 14:40:42 -04:00
|
|
|
// NewSandbox provides a new sandbox instance created in an os specific way
|
|
|
|
// provided a key which uniquely identifies the sandbox
|
2016-06-10 20:32:19 -04:00
|
|
|
func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
|
|
|
|
if !isRestore {
|
|
|
|
err := createNetworkNamespace(key, osCreate)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-06-14 01:24:12 -04:00
|
|
|
} else {
|
|
|
|
once.Do(createBasePath)
|
2015-05-03 16:29:43 -04:00
|
|
|
}
|
|
|
|
|
2017-02-27 17:23:12 -05:00
|
|
|
n := &networkNamespace{path: key, isDefault: !osCreate, nextIfIndex: make(map[string]int)}
|
2016-05-16 14:51:40 -04:00
|
|
|
|
|
|
|
sboxNs, err := netns.GetFromPath(n.path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
|
|
|
|
}
|
|
|
|
defer sboxNs.Close()
|
|
|
|
|
2016-06-15 16:07:52 -04:00
|
|
|
n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
|
2016-05-16 14:51:40 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-11-15 14:42:47 -05:00
|
|
|
err = n.nlHandle.SetSocketTimeout(ns.NetlinkSocketsTimeout)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
|
|
|
|
}
|
2017-12-27 23:32:29 -05:00
|
|
|
// In live-restore mode, IPV6 entries are getting cleaned up due to below code
|
2018-09-06 23:43:42 -04:00
|
|
|
// We should retain IPV6 configurations in live-restore mode when Docker Daemon
|
2017-12-27 23:32:29 -05:00
|
|
|
// comes back. It should work as it is on other cases
|
2016-02-23 16:28:14 -05:00
|
|
|
// As starting point, disable IPv6 on all interfaces
|
2017-12-27 23:32:29 -05:00
|
|
|
if !isRestore && !n.isDefault {
|
2017-04-07 17:14:26 -04:00
|
|
|
err = setIPv6(n.path, "all", false)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
|
|
|
|
}
|
2016-02-23 16:28:14 -05:00
|
|
|
}
|
|
|
|
|
2016-05-16 14:51:40 -04:00
|
|
|
if err = n.loopbackUp(); err != nil {
|
|
|
|
n.nlHandle.Delete()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return n, nil
|
2015-06-04 23:21:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (n *networkNamespace) InterfaceOptions() IfaceOptionSetter {
|
|
|
|
return n
|
2015-04-13 14:40:42 -04:00
|
|
|
}
|
|
|
|
|
2015-06-15 14:35:13 -04:00
|
|
|
func (n *networkNamespace) NeighborOptions() NeighborOptionSetter {
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
2015-09-09 19:20:54 -04:00
|
|
|
func mountNetworkNamespace(basePath string, lnPath string) error {
|
2016-05-16 14:51:40 -04:00
|
|
|
return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
|
2015-09-09 19:20:54 -04:00
|
|
|
}
|
2015-05-27 16:20:24 -04:00
|
|
|
|
2015-09-09 19:20:54 -04:00
|
|
|
// GetSandboxForExternalKey returns sandbox object for the supplied path
|
|
|
|
func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
|
2016-05-16 14:51:40 -04:00
|
|
|
if err := createNamespaceFile(key); err != nil {
|
2015-09-09 19:20:54 -04:00
|
|
|
return nil, err
|
|
|
|
}
|
2016-05-16 14:51:40 -04:00
|
|
|
|
|
|
|
if err := mountNetworkNamespace(basePath, key); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-02-27 17:23:12 -05:00
|
|
|
n := &networkNamespace{path: key, nextIfIndex: make(map[string]int)}
|
2016-05-16 14:51:40 -04:00
|
|
|
|
|
|
|
sboxNs, err := netns.GetFromPath(n.path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
|
|
|
|
}
|
|
|
|
defer sboxNs.Close()
|
|
|
|
|
2016-06-15 16:07:52 -04:00
|
|
|
n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
|
2015-09-09 19:20:54 -04:00
|
|
|
if err != nil {
|
2016-05-16 14:51:40 -04:00
|
|
|
return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-11-15 14:42:47 -05:00
|
|
|
err = n.nlHandle.SetSocketTimeout(ns.NetlinkSocketsTimeout)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-02-23 16:28:14 -05:00
|
|
|
// As starting point, disable IPv6 on all interfaces
|
|
|
|
err = setIPv6(n.path, "all", false)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
|
|
|
|
}
|
|
|
|
|
2016-05-16 14:51:40 -04:00
|
|
|
if err = n.loopbackUp(); err != nil {
|
|
|
|
n.nlHandle.Delete()
|
2015-09-09 19:20:54 -04:00
|
|
|
return nil, err
|
|
|
|
}
|
2016-05-16 14:51:40 -04:00
|
|
|
|
|
|
|
return n, nil
|
2015-09-09 19:20:54 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func reexecCreateNamespace() {
|
|
|
|
if len(os.Args) < 2 {
|
2016-11-01 00:26:14 -04:00
|
|
|
logrus.Fatal("no namespace path provided")
|
2015-09-09 19:20:54 -04:00
|
|
|
}
|
|
|
|
if err := mountNetworkNamespace("/proc/self/ns/net", os.Args[1]); err != nil {
|
2016-11-01 00:26:14 -04:00
|
|
|
logrus.Fatal(err)
|
2015-05-27 16:20:24 -04:00
|
|
|
}
|
2015-05-26 17:14:01 -04:00
|
|
|
}
|
|
|
|
|
2015-06-04 23:21:23 -04:00
|
|
|
func createNetworkNamespace(path string, osCreate bool) error {
|
2015-03-11 19:37:43 -04:00
|
|
|
if err := createNamespaceFile(path); err != nil {
|
2015-06-04 23:21:23 -04:00
|
|
|
return err
|
2015-03-11 19:37:43 -04:00
|
|
|
}
|
|
|
|
|
2015-05-26 17:14:01 -04:00
|
|
|
cmd := &exec.Cmd{
|
|
|
|
Path: reexec.Self(),
|
|
|
|
Args: append([]string{"netns-create"}, path),
|
|
|
|
Stdout: os.Stdout,
|
|
|
|
Stderr: os.Stderr,
|
|
|
|
}
|
2015-05-05 00:20:45 -04:00
|
|
|
if osCreate {
|
2015-05-26 17:14:01 -04:00
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
|
|
|
cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNET
|
2015-02-19 20:21:42 -05:00
|
|
|
}
|
2015-05-26 17:14:01 -04:00
|
|
|
if err := cmd.Run(); err != nil {
|
2015-06-04 23:21:23 -04:00
|
|
|
return fmt.Errorf("namespace creation reexec command failed: %v", err)
|
2015-03-11 19:37:43 -04:00
|
|
|
}
|
|
|
|
|
2015-06-04 23:21:23 -04:00
|
|
|
return nil
|
2015-02-19 20:21:42 -05:00
|
|
|
}
|
|
|
|
|
2015-05-26 17:14:01 -04:00
|
|
|
func unmountNamespaceFile(path string) {
|
2015-05-19 18:08:58 -04:00
|
|
|
if _, err := os.Stat(path); err == nil {
|
2021-05-27 20:15:56 -04:00
|
|
|
if err := syscall.Unmount(path, syscall.MNT_DETACH); err != nil && !errors.Is(err, unix.EINVAL) {
|
|
|
|
logrus.WithError(err).Error("Error unmounting namespace file")
|
|
|
|
}
|
2015-05-19 18:08:58 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-11 19:37:43 -04:00
|
|
|
func createNamespaceFile(path string) (err error) {
|
|
|
|
var f *os.File
|
2015-04-28 01:57:36 -04:00
|
|
|
|
2015-04-30 01:58:12 -04:00
|
|
|
once.Do(createBasePath)
|
2015-05-26 17:14:01 -04:00
|
|
|
// Remove it from garbage collection list if present
|
|
|
|
removeFromGarbagePaths(path)
|
|
|
|
|
|
|
|
// If the path is there unmount it first
|
|
|
|
unmountNamespaceFile(path)
|
|
|
|
|
|
|
|
// wait for garbage collection to complete if it is in progress
|
|
|
|
// before trying to create the file.
|
|
|
|
gpmWg.Wait()
|
|
|
|
|
2015-03-11 19:37:43 -04:00
|
|
|
if f, err = os.Create(path); err == nil {
|
|
|
|
f.Close()
|
|
|
|
}
|
2015-05-26 17:14:01 -04:00
|
|
|
|
2015-03-11 19:37:43 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2016-05-16 14:51:40 -04:00
|
|
|
func (n *networkNamespace) loopbackUp() error {
|
|
|
|
iface, err := n.nlHandle.LinkByName("lo")
|
2015-03-11 19:37:43 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-05-16 14:51:40 -04:00
|
|
|
return n.nlHandle.LinkSetUp(iface)
|
2015-03-11 19:37:43 -04:00
|
|
|
}
|
|
|
|
|
2018-04-09 23:58:51 -04:00
|
|
|
func (n *networkNamespace) GetLoopbackIfaceName() string {
|
|
|
|
return "lo"
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *networkNamespace) AddAliasIP(ifName string, ip *net.IPNet) error {
|
|
|
|
iface, err := n.nlHandle.LinkByName(ifName)
|
2017-08-01 18:33:48 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return n.nlHandle.AddrAdd(iface, &netlink.Addr{IPNet: ip})
|
|
|
|
}
|
|
|
|
|
2018-04-09 23:58:51 -04:00
|
|
|
func (n *networkNamespace) RemoveAliasIP(ifName string, ip *net.IPNet) error {
|
|
|
|
iface, err := n.nlHandle.LinkByName(ifName)
|
2017-08-01 18:33:48 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return n.nlHandle.AddrDel(iface, &netlink.Addr{IPNet: ip})
|
|
|
|
}
|
|
|
|
|
2018-09-07 09:48:05 -04:00
|
|
|
func (n *networkNamespace) DisableARPForVIP(srcName string) (Err error) {
|
|
|
|
dstName := ""
|
|
|
|
for _, i := range n.Interfaces() {
|
|
|
|
if i.SrcName() == srcName {
|
|
|
|
dstName = i.DstName()
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if dstName == "" {
|
|
|
|
return fmt.Errorf("failed to find interface %s in sandbox", srcName)
|
|
|
|
}
|
|
|
|
|
|
|
|
err := n.InvokeFunc(func() {
|
|
|
|
path := filepath.Join("/proc/sys/net/ipv4/conf", dstName, "arp_ignore")
|
2021-08-24 06:10:50 -04:00
|
|
|
if err := os.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil {
|
2018-09-07 09:48:05 -04:00
|
|
|
Err = fmt.Errorf("Failed to set %s to 1: %v", path, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
path = filepath.Join("/proc/sys/net/ipv4/conf", dstName, "arp_announce")
|
2021-08-24 06:10:50 -04:00
|
|
|
if err := os.WriteFile(path, []byte{'2', '\n'}, 0644); err != nil {
|
2018-09-07 09:48:05 -04:00
|
|
|
Err = fmt.Errorf("Failed to set %s to 2: %v", path, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-06-15 14:35:13 -04:00
|
|
|
func (n *networkNamespace) InvokeFunc(f func()) error {
|
|
|
|
return nsInvoke(n.nsPath(), func(nsFD int) error { return nil }, func(callerFD int) error {
|
|
|
|
f()
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2015-08-31 14:55:58 -04:00
|
|
|
// InitOSContext initializes OS context while configuring network resources
|
|
|
|
func InitOSContext() func() {
|
2016-05-16 14:51:40 -04:00
|
|
|
runtime.LockOSThread()
|
2019-08-15 16:39:14 -04:00
|
|
|
if err := ns.SetNamespace(); err != nil {
|
|
|
|
logrus.Error(err)
|
|
|
|
}
|
2015-08-31 14:55:58 -04:00
|
|
|
return runtime.UnlockOSThread
|
|
|
|
}
|
|
|
|
|
2019-08-15 16:39:14 -04:00
|
|
|
func nsInvoke(path string, prefunc func(nsFD int) error, postfunc func(callerFD int) error) error {
|
2015-08-31 14:55:58 -04:00
|
|
|
defer InitOSContext()()
|
2015-05-05 20:32:38 -04:00
|
|
|
|
2016-05-16 14:51:40 -04:00
|
|
|
newNs, err := netns.GetFromPath(path)
|
2015-05-05 20:32:38 -04:00
|
|
|
if err != nil {
|
2015-06-04 23:21:23 -04:00
|
|
|
return fmt.Errorf("failed get network namespace %q: %v", path, err)
|
2015-05-05 20:32:38 -04:00
|
|
|
}
|
2016-05-16 14:51:40 -04:00
|
|
|
defer newNs.Close()
|
2015-05-05 20:32:38 -04:00
|
|
|
|
2015-06-04 23:21:23 -04:00
|
|
|
// Invoked before the namespace switch happens but after the namespace file
|
|
|
|
// handle is obtained.
|
2016-05-16 14:51:40 -04:00
|
|
|
if err := prefunc(int(newNs)); err != nil {
|
2015-06-04 23:21:23 -04:00
|
|
|
return fmt.Errorf("failed in prefunc: %v", err)
|
2015-02-19 20:21:42 -05:00
|
|
|
}
|
2015-03-11 19:37:43 -04:00
|
|
|
|
2016-05-16 14:51:40 -04:00
|
|
|
if err = netns.Set(newNs); err != nil {
|
2015-03-11 19:37:43 -04:00
|
|
|
return err
|
|
|
|
}
|
2019-08-15 16:39:14 -04:00
|
|
|
defer ns.SetNamespace()
|
2015-03-11 19:37:43 -04:00
|
|
|
|
2015-06-04 23:21:23 -04:00
|
|
|
// Invoked after the namespace switch.
|
2015-09-07 13:33:28 -04:00
|
|
|
return postfunc(ns.ParseHandlerInt())
|
2015-02-19 20:21:42 -05:00
|
|
|
}
|
|
|
|
|
2015-06-04 23:21:23 -04:00
|
|
|
func (n *networkNamespace) nsPath() string {
|
2015-06-02 16:20:15 -04:00
|
|
|
n.Lock()
|
2015-06-04 23:21:23 -04:00
|
|
|
defer n.Unlock()
|
2015-05-19 20:08:56 -04:00
|
|
|
|
2015-06-04 23:21:23 -04:00
|
|
|
return n.path
|
2015-05-19 20:08:56 -04:00
|
|
|
}
|
|
|
|
|
2015-06-04 23:21:23 -04:00
|
|
|
func (n *networkNamespace) Info() Info {
|
|
|
|
return n
|
2015-02-19 20:21:42 -05:00
|
|
|
}
|
|
|
|
|
2015-04-13 14:40:42 -04:00
|
|
|
func (n *networkNamespace) Key() string {
|
2015-02-19 20:21:42 -05:00
|
|
|
return n.path
|
|
|
|
}
|
2015-02-24 14:19:00 -05:00
|
|
|
|
|
|
|
func (n *networkNamespace) Destroy() error {
|
2016-05-16 14:51:40 -04:00
|
|
|
if n.nlHandle != nil {
|
|
|
|
n.nlHandle.Delete()
|
|
|
|
}
|
2015-02-24 14:19:00 -05:00
|
|
|
// Assuming no running process is executing in this network namespace,
|
|
|
|
// unmounting is sufficient to destroy it.
|
2015-04-28 01:57:36 -04:00
|
|
|
if err := syscall.Unmount(n.path, syscall.MNT_DETACH); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-05-26 17:14:01 -04:00
|
|
|
// Stash it into the garbage collection list
|
|
|
|
addToGarbagePaths(n.path)
|
|
|
|
return nil
|
2015-02-24 14:19:00 -05:00
|
|
|
}
|
2016-06-10 20:32:19 -04:00
|
|
|
|
|
|
|
// Restore restore the network namespace
|
|
|
|
func (n *networkNamespace) Restore(ifsopt map[string][]IfaceOption, routes []*types.StaticRoute, gw net.IP, gw6 net.IP) error {
|
|
|
|
// restore interfaces
|
|
|
|
for name, opts := range ifsopt {
|
|
|
|
if !strings.Contains(name, "+") {
|
|
|
|
return fmt.Errorf("wrong iface name in restore osl sandbox interface: %s", name)
|
|
|
|
}
|
|
|
|
seps := strings.Split(name, "+")
|
|
|
|
srcName := seps[0]
|
|
|
|
dstPrefix := seps[1]
|
|
|
|
i := &nwIface{srcName: srcName, dstName: dstPrefix, ns: n}
|
|
|
|
i.processInterfaceOptions(opts...)
|
|
|
|
if i.master != "" {
|
|
|
|
i.dstMaster = n.findDst(i.master, true)
|
|
|
|
if i.dstMaster == "" {
|
|
|
|
return fmt.Errorf("could not find an appropriate master %q for %q",
|
|
|
|
i.master, i.srcName)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if n.isDefault {
|
|
|
|
i.dstName = i.srcName
|
|
|
|
} else {
|
2016-06-11 19:34:54 -04:00
|
|
|
links, err := n.nlHandle.LinkList()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to retrieve list of links in network namespace %q during restore", n.path)
|
|
|
|
}
|
2016-06-10 20:32:19 -04:00
|
|
|
// due to the docker network connect/disconnect, so the dstName should
|
|
|
|
// restore from the namespace
|
2016-06-11 19:34:54 -04:00
|
|
|
for _, link := range links {
|
|
|
|
addrs, err := n.nlHandle.AddrList(link, netlink.FAMILY_V4)
|
2016-06-10 20:32:19 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-06-11 19:34:54 -04:00
|
|
|
ifaceName := link.Attrs().Name
|
|
|
|
if strings.HasPrefix(ifaceName, "vxlan") {
|
|
|
|
if i.dstName == "vxlan" {
|
|
|
|
i.dstName = ifaceName
|
|
|
|
break
|
2016-06-10 20:32:19 -04:00
|
|
|
}
|
2016-06-11 19:34:54 -04:00
|
|
|
}
|
|
|
|
// find the interface name by ip
|
|
|
|
if i.address != nil {
|
|
|
|
for _, addr := range addrs {
|
|
|
|
if addr.IPNet.String() == i.address.String() {
|
|
|
|
i.dstName = ifaceName
|
2016-06-10 20:32:19 -04:00
|
|
|
break
|
|
|
|
}
|
2016-06-11 19:34:54 -04:00
|
|
|
continue
|
2016-06-10 20:32:19 -04:00
|
|
|
}
|
2016-06-11 19:34:54 -04:00
|
|
|
if i.dstName == ifaceName {
|
|
|
|
break
|
2016-06-10 20:32:19 -04:00
|
|
|
}
|
2016-06-11 19:34:54 -04:00
|
|
|
}
|
|
|
|
// This is to find the interface name of the pair in overlay sandbox
|
|
|
|
if strings.HasPrefix(ifaceName, "veth") {
|
|
|
|
if i.master != "" && i.dstName == "veth" {
|
|
|
|
i.dstName = ifaceName
|
2016-06-10 20:32:19 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-06-11 19:34:54 -04:00
|
|
|
|
2016-06-10 20:32:19 -04:00
|
|
|
var index int
|
|
|
|
indexStr := strings.TrimPrefix(i.dstName, dstPrefix)
|
|
|
|
if indexStr != "" {
|
|
|
|
index, err = strconv.Atoi(indexStr)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
index++
|
|
|
|
n.Lock()
|
2017-02-27 17:23:12 -05:00
|
|
|
if index > n.nextIfIndex[dstPrefix] {
|
|
|
|
n.nextIfIndex[dstPrefix] = index
|
2016-06-10 20:32:19 -04:00
|
|
|
}
|
|
|
|
n.iFaces = append(n.iFaces, i)
|
|
|
|
n.Unlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// restore routes
|
|
|
|
for _, r := range routes {
|
|
|
|
n.Lock()
|
|
|
|
n.staticRoutes = append(n.staticRoutes, r)
|
|
|
|
n.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
// restore gateway
|
|
|
|
if len(gw) > 0 {
|
|
|
|
n.Lock()
|
|
|
|
n.gw = gw
|
|
|
|
n.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(gw6) > 0 {
|
|
|
|
n.Lock()
|
|
|
|
n.gwv6 = gw6
|
|
|
|
n.Unlock()
|
|
|
|
}
|
2016-06-11 19:34:54 -04:00
|
|
|
|
2016-06-10 20:32:19 -04:00
|
|
|
return nil
|
|
|
|
}
|
2016-02-23 16:28:14 -05:00
|
|
|
|
|
|
|
// Checks whether IPv6 needs to be enabled/disabled on the loopback interface
|
|
|
|
func (n *networkNamespace) checkLoV6() {
|
|
|
|
var (
|
|
|
|
enable = false
|
|
|
|
action = "disable"
|
|
|
|
)
|
|
|
|
|
|
|
|
n.Lock()
|
|
|
|
for _, iface := range n.iFaces {
|
|
|
|
if iface.AddressIPv6() != nil {
|
|
|
|
enable = true
|
|
|
|
action = "enable"
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
n.Unlock()
|
|
|
|
|
|
|
|
if n.loV6Enabled == enable {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := setIPv6(n.path, "lo", enable); err != nil {
|
|
|
|
logrus.Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
n.loV6Enabled = enable
|
|
|
|
}
|
|
|
|
|
|
|
|
func reexecSetIPv6() {
|
|
|
|
runtime.LockOSThread()
|
|
|
|
defer runtime.UnlockOSThread()
|
|
|
|
|
|
|
|
if len(os.Args) < 3 {
|
|
|
|
logrus.Errorf("invalid number of arguments for %s", os.Args[0])
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
ns, err := netns.GetFromPath(os.Args[1])
|
|
|
|
if err != nil {
|
|
|
|
logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
|
|
|
|
os.Exit(2)
|
|
|
|
}
|
|
|
|
defer ns.Close()
|
|
|
|
|
|
|
|
if err = netns.Set(ns); err != nil {
|
|
|
|
logrus.Errorf("setting into container netns %q failed: %v", os.Args[1], err)
|
|
|
|
os.Exit(3)
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
action = "disable"
|
|
|
|
value = byte('1')
|
|
|
|
path = fmt.Sprintf("/proc/sys/net/ipv6/conf/%s/disable_ipv6", os.Args[2])
|
|
|
|
)
|
|
|
|
|
|
|
|
if os.Args[3] == "true" {
|
|
|
|
action = "enable"
|
|
|
|
value = byte('0')
|
|
|
|
}
|
|
|
|
|
2018-04-02 17:19:24 -04:00
|
|
|
if _, err := os.Stat(path); err != nil {
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
logrus.Warnf("file does not exist: %s : %v Has IPv6 been disabled in this node's kernel?", path, err)
|
|
|
|
os.Exit(0)
|
|
|
|
}
|
|
|
|
logrus.Errorf("failed to stat %s : %v", path, err)
|
|
|
|
os.Exit(5)
|
|
|
|
}
|
|
|
|
|
2021-08-24 06:10:50 -04:00
|
|
|
if err = os.WriteFile(path, []byte{value, '\n'}, 0644); err != nil {
|
2016-02-23 16:28:14 -05:00
|
|
|
logrus.Errorf("failed to %s IPv6 forwarding for container's interface %s: %v", action, os.Args[2], err)
|
|
|
|
os.Exit(4)
|
|
|
|
}
|
|
|
|
|
|
|
|
os.Exit(0)
|
|
|
|
}
|
|
|
|
|
|
|
|
func setIPv6(path, iface string, enable bool) error {
|
|
|
|
cmd := &exec.Cmd{
|
|
|
|
Path: reexec.Self(),
|
|
|
|
Args: append([]string{"set-ipv6"}, path, iface, strconv.FormatBool(enable)),
|
|
|
|
Stdout: os.Stdout,
|
|
|
|
Stderr: os.Stderr,
|
|
|
|
}
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
|
|
return fmt.Errorf("reexec to set IPv6 failed: %v", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2018-05-18 17:10:14 -04:00
|
|
|
|
|
|
|
// ApplyOSTweaks applies linux configs on the sandbox
|
|
|
|
func (n *networkNamespace) ApplyOSTweaks(types []SandboxType) {
|
|
|
|
for _, t := range types {
|
|
|
|
switch t {
|
2022-01-11 23:13:39 -05:00
|
|
|
case SandboxTypeLoadBalancer, SandboxTypeIngress:
|
|
|
|
kernel.ApplyOSTweaks(map[string]*kernel.OSValue{
|
|
|
|
// disables any special handling on port reuse of existing IPVS connection table entries
|
|
|
|
// more info: https://github.com/torvalds/linux/blame/v5.15/Documentation/networking/ipvs-sysctl.rst#L32
|
|
|
|
"net.ipv4.vs.conn_reuse_mode": {Value: "0", CheckFn: nil},
|
|
|
|
// expires connection from the IPVS connection table when the backend is not available
|
|
|
|
// more info: https://github.com/torvalds/linux/blame/v5.15/Documentation/networking/ipvs-sysctl.rst#L133
|
|
|
|
"net.ipv4.vs.expire_nodest_conn": {Value: "1", CheckFn: nil},
|
|
|
|
// expires persistent connections to destination servers with weights set to 0
|
|
|
|
// more info: https://github.com/torvalds/linux/blame/v5.15/Documentation/networking/ipvs-sysctl.rst#L151
|
|
|
|
"net.ipv4.vs.expire_quiescent_template": {Value: "1", CheckFn: nil},
|
|
|
|
})
|
2018-05-18 17:10:14 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|