mirror of
				https://github.com/moby/moby.git
				synced 2022-11-09 12:21:53 -05:00 
			
		
		
		
	Vendor libcontainer v0.0.4
Noteworthy changes: - Add Prestart/Poststop hook support - Fix bug finding cgroup mount directory - Add OomScoreAdj as a container configuration option - Ensure the cleanup jobs in the deferrer are executed on error - Don't make modifications to /dev when it is bind mounted Other changes in runc: https://github.com/opencontainers/runc/compare/v0.0.3...v0.0.4 Signed-off-by: David Calavera <david.calavera@gmail.com>
This commit is contained in:
		
							parent
							
								
									9fbef808c1
								
							
						
					
					
						commit
						55a601e3f1
					
				
					 28 changed files with 525 additions and 644 deletions
				
			
		| 
						 | 
				
			
			@ -1112,12 +1112,9 @@ func (container *Container) unmountVolumes(forceSyscall bool) error {
 | 
			
		|||
 | 
			
		||||
func (container *Container) networkMounts() []execdriver.Mount {
 | 
			
		||||
	var mounts []execdriver.Mount
 | 
			
		||||
	mode := "Z"
 | 
			
		||||
	if container.hostConfig.NetworkMode.IsContainer() {
 | 
			
		||||
		mode = "z"
 | 
			
		||||
	}
 | 
			
		||||
	shared := container.hostConfig.NetworkMode.IsContainer()
 | 
			
		||||
	if container.ResolvConfPath != "" {
 | 
			
		||||
		label.Relabel(container.ResolvConfPath, container.MountLabel, mode)
 | 
			
		||||
		label.Relabel(container.ResolvConfPath, container.MountLabel, shared)
 | 
			
		||||
		writable := !container.hostConfig.ReadonlyRootfs
 | 
			
		||||
		if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
 | 
			
		||||
			writable = m.RW
 | 
			
		||||
| 
						 | 
				
			
			@ -1130,7 +1127,7 @@ func (container *Container) networkMounts() []execdriver.Mount {
 | 
			
		|||
		})
 | 
			
		||||
	}
 | 
			
		||||
	if container.HostnamePath != "" {
 | 
			
		||||
		label.Relabel(container.HostnamePath, container.MountLabel, mode)
 | 
			
		||||
		label.Relabel(container.HostnamePath, container.MountLabel, shared)
 | 
			
		||||
		writable := !container.hostConfig.ReadonlyRootfs
 | 
			
		||||
		if m, exists := container.MountPoints["/etc/hostname"]; exists {
 | 
			
		||||
			writable = m.RW
 | 
			
		||||
| 
						 | 
				
			
			@ -1143,7 +1140,7 @@ func (container *Container) networkMounts() []execdriver.Mount {
 | 
			
		|||
		})
 | 
			
		||||
	}
 | 
			
		||||
	if container.HostsPath != "" {
 | 
			
		||||
		label.Relabel(container.HostsPath, container.MountLabel, mode)
 | 
			
		||||
		label.Relabel(container.HostsPath, container.MountLabel, shared)
 | 
			
		||||
		writable := !container.hostConfig.ReadonlyRootfs
 | 
			
		||||
		if m, exists := container.MountPoints["/etc/hosts"]; exists {
 | 
			
		||||
			writable = m.RW
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -59,7 +59,7 @@ func createContainerPlatformSpecificSettings(container *Container, config *runco
 | 
			
		|||
			return err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err := label.Relabel(v.Path(), container.MountLabel, "z"); err != nil {
 | 
			
		||||
		if err := label.Relabel(v.Path(), container.MountLabel, true); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -355,7 +355,8 @@ func (daemon *Daemon) registerMountPoints(container *Container, hostConfig *runc
 | 
			
		|||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err := label.Relabel(bind.Source, container.MountLabel, bind.Mode); err != nil {
 | 
			
		||||
		shared := label.IsShared(bind.Mode)
 | 
			
		||||
		if err := label.Relabel(bind.Source, container.MountLabel, shared); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		binds[bind.Destination] = true
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce
 | 
			
		|||
clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
 | 
			
		||||
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
 | 
			
		||||
 | 
			
		||||
clone git github.com/opencontainers/runc v0.0.3 # libcontainer
 | 
			
		||||
clone git github.com/opencontainers/runc v0.0.4 # libcontainer
 | 
			
		||||
# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
 | 
			
		||||
clone git github.com/coreos/go-systemd v3
 | 
			
		||||
clone git github.com/godbus/dbus v2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -83,7 +83,7 @@ type data struct {
 | 
			
		|||
	pid    int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *Manager) Apply(pid int) error {
 | 
			
		||||
func (m *Manager) Apply(pid int) (err error) {
 | 
			
		||||
	if m.Cgroups == nil {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -235,12 +235,12 @@ func getCgroupData(c *configs.Cgroup, pid int) (*data, error) {
 | 
			
		|||
	}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (raw *data) parent(subsystem, mountpoint, src string) (string, error) {
 | 
			
		||||
	initPath, err := cgroups.GetInitCgroupDir(subsystem)
 | 
			
		||||
func (raw *data) parent(subsystem, mountpoint, root string) (string, error) {
 | 
			
		||||
	initPath, err := cgroups.GetThisCgroupDir(subsystem)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	relDir, err := filepath.Rel(src, initPath)
 | 
			
		||||
	relDir, err := filepath.Rel(root, initPath)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -248,7 +248,7 @@ func (raw *data) parent(subsystem, mountpoint, src string) (string, error) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
func (raw *data) path(subsystem string) (string, error) {
 | 
			
		||||
	mnt, src, err := cgroups.FindCgroupMountpointAndSource(subsystem)
 | 
			
		||||
	mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
 | 
			
		||||
	// If we didn't mount the subsystem, there is no point we make the path.
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
| 
						 | 
				
			
			@ -259,7 +259,7 @@ func (raw *data) path(subsystem string) (string, error) {
 | 
			
		|||
		return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	parent, err := raw.parent(subsystem, mnt, src)
 | 
			
		||||
	parent, err := raw.parent(subsystem, mnt, root)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -17,7 +17,7 @@ import (
 | 
			
		|||
type MemoryGroup struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *MemoryGroup) Apply(d *data) error {
 | 
			
		||||
func (s *MemoryGroup) Apply(d *data) (err error) {
 | 
			
		||||
	path, err := d.path("memory")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		if cgroups.IsNotFound(err) {
 | 
			
		||||
| 
						 | 
				
			
			@ -28,21 +28,22 @@ func (s *MemoryGroup) Apply(d *data) error {
 | 
			
		|||
	if err := os.MkdirAll(path, 0755); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	defer func() {
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			os.RemoveAll(path)
 | 
			
		||||
		}
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
	if err := s.Set(path, d.c); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// We need to join memory cgroup after set memory limits, because
 | 
			
		||||
	// kmem.limit_in_bytes can only be set when the cgroup is empty.
 | 
			
		||||
	_, err = d.join("memory")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
	if _, err = d.join("memory"); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	defer func() {
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			os.RemoveAll(path)
 | 
			
		||||
		}
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,6 +21,9 @@ const cgroupNamePrefix = "name="
 | 
			
		|||
 | 
			
		||||
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
 | 
			
		||||
func FindCgroupMountpoint(subsystem string) (string, error) {
 | 
			
		||||
	// We are not using mount.GetMounts() because it's super-inefficient,
 | 
			
		||||
	// parsing it directly sped up x10 times because of not using Sscanf.
 | 
			
		||||
	// It was one of two major performance drawbacks in container start.
 | 
			
		||||
	f, err := os.Open("/proc/self/mountinfo")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
| 
						 | 
				
			
			@ -44,7 +47,7 @@ func FindCgroupMountpoint(subsystem string) (string, error) {
 | 
			
		|||
	return "", NewNotFoundError(subsystem)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func FindCgroupMountpointAndSource(subsystem string) (string, string, error) {
 | 
			
		||||
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
 | 
			
		||||
	f, err := os.Open("/proc/self/mountinfo")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", "", err
 | 
			
		||||
| 
						 | 
				
			
			@ -69,15 +72,28 @@ func FindCgroupMountpointAndSource(subsystem string) (string, string, error) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
func FindCgroupMountpointDir() (string, error) {
 | 
			
		||||
	mounts, err := mount.GetMounts()
 | 
			
		||||
	f, err := os.Open("/proc/self/mountinfo")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	defer f.Close()
 | 
			
		||||
 | 
			
		||||
	for _, mount := range mounts {
 | 
			
		||||
		if mount.Fstype == "cgroup" {
 | 
			
		||||
			return filepath.Dir(mount.Mountpoint), nil
 | 
			
		||||
	scanner := bufio.NewScanner(f)
 | 
			
		||||
	for scanner.Scan() {
 | 
			
		||||
		text := scanner.Text()
 | 
			
		||||
		fields := strings.Split(text, " ")
 | 
			
		||||
		// Safe as mountinfo encodes mountpoints with spaces as \040.
 | 
			
		||||
		index := strings.Index(text, " - ")
 | 
			
		||||
		postSeparatorFields := strings.Fields(text[index+3:])
 | 
			
		||||
		if len(postSeparatorFields) < 3 {
 | 
			
		||||
			return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
 | 
			
		||||
		}
 | 
			
		||||
		if postSeparatorFields[0] == "cgroup" {
 | 
			
		||||
			return filepath.Dir(fields[4]), nil
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err := scanner.Err(); err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return "", NewNotFoundError("cgroup")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,11 @@
 | 
			
		|||
package configs
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"os/exec"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Rlimit struct {
 | 
			
		||||
	Type int    `json:"type"`
 | 
			
		||||
	Hard uint64 `json:"hard"`
 | 
			
		||||
| 
						 | 
				
			
			@ -13,36 +19,46 @@ type IDMap struct {
 | 
			
		|||
	Size        int `json:"size"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Seccomp represents syscall restrictions
 | 
			
		||||
type Seccomp struct {
 | 
			
		||||
	Syscalls []*Syscall `json:"syscalls"`
 | 
			
		||||
	DefaultAction Action     `json:"default_action"`
 | 
			
		||||
	Syscalls      []*Syscall `json:"syscalls"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// An action to be taken upon rule match in Seccomp
 | 
			
		||||
type Action int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	Kill Action = iota - 3
 | 
			
		||||
	Kill Action = iota - 4
 | 
			
		||||
	Errno
 | 
			
		||||
	Trap
 | 
			
		||||
	Allow
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// A comparison operator to be used when matching syscall arguments in Seccomp
 | 
			
		||||
type Operator int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	EqualTo Operator = iota
 | 
			
		||||
	NotEqualTo
 | 
			
		||||
	GreatherThan
 | 
			
		||||
	GreaterThan
 | 
			
		||||
	GreaterThanOrEqualTo
 | 
			
		||||
	LessThan
 | 
			
		||||
	LessThanOrEqualTo
 | 
			
		||||
	MaskEqualTo
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// A rule to match a specific syscall argument in Seccomp
 | 
			
		||||
type Arg struct {
 | 
			
		||||
	Index int      `json:"index"`
 | 
			
		||||
	Value uint32   `json:"value"`
 | 
			
		||||
	Op    Operator `json:"op"`
 | 
			
		||||
	Index    uint     `json:"index"`
 | 
			
		||||
	Value    uint64   `json:"value"`
 | 
			
		||||
	ValueTwo uint64   `json:"value_two"`
 | 
			
		||||
	Op       Operator `json:"op"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// An rule to match a syscall in Seccomp
 | 
			
		||||
type Syscall struct {
 | 
			
		||||
	Value  int    `json:"value"`
 | 
			
		||||
	Name   string `json:"name"`
 | 
			
		||||
	Action Action `json:"action"`
 | 
			
		||||
	Args   []*Arg `json:"args"`
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -117,6 +133,12 @@ type Config struct {
 | 
			
		|||
	// If Rlimits are not set, the container will inherit rlimits from the parent process
 | 
			
		||||
	Rlimits []Rlimit `json:"rlimits"`
 | 
			
		||||
 | 
			
		||||
	// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
 | 
			
		||||
	// for a process. Valid values are between the range [-1000, '1000'], where processes with
 | 
			
		||||
	// higher scores are preferred for being killed.
 | 
			
		||||
	// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
 | 
			
		||||
	OomScoreAdj int `json:"oom_score_adj"`
 | 
			
		||||
 | 
			
		||||
	// AdditionalGroups specifies the gids that should be added to supplementary groups
 | 
			
		||||
	// in addition to those that the user belongs to.
 | 
			
		||||
	AdditionalGroups []string `json:"additional_groups"`
 | 
			
		||||
| 
						 | 
				
			
			@ -140,7 +162,79 @@ type Config struct {
 | 
			
		|||
	Sysctl map[string]string `json:"sysctl"`
 | 
			
		||||
 | 
			
		||||
	// Seccomp allows actions to be taken whenever a syscall is made within the container.
 | 
			
		||||
	// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
 | 
			
		||||
	// can be specified on a per syscall basis.
 | 
			
		||||
	// A number of rules are given, each having an action to be taken if a syscall matches it.
 | 
			
		||||
	// A default action to be taken if no rules match is also given.
 | 
			
		||||
	Seccomp *Seccomp `json:"seccomp"`
 | 
			
		||||
 | 
			
		||||
	// Hooks are a collection of actions to perform at various container lifecycle events.
 | 
			
		||||
	// Hooks are not able to be marshaled to json but they are also not needed to.
 | 
			
		||||
	Hooks *Hooks `json:"-"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Hooks struct {
 | 
			
		||||
	// Prestart commands are executed after the container namespaces are created,
 | 
			
		||||
	// but before the user supplied command is executed from init.
 | 
			
		||||
	Prestart []Hook
 | 
			
		||||
 | 
			
		||||
	// Poststop commands are executed after the container init process exits.
 | 
			
		||||
	Poststop []Hook
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// HookState is the payload provided to a hook on execution.
 | 
			
		||||
type HookState struct {
 | 
			
		||||
	ID   string `json:"id"`
 | 
			
		||||
	Pid  int    `json:"pid"`
 | 
			
		||||
	Root string `json:"root"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Hook interface {
 | 
			
		||||
	// Run executes the hook with the provided state.
 | 
			
		||||
	Run(HookState) error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewFunctionHooks will call the provided function when the hook is run.
 | 
			
		||||
func NewFunctionHook(f func(HookState) error) FuncHook {
 | 
			
		||||
	return FuncHook{
 | 
			
		||||
		run: f,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type FuncHook struct {
 | 
			
		||||
	run func(HookState) error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f FuncHook) Run(s HookState) error {
 | 
			
		||||
	return f.run(s)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Command struct {
 | 
			
		||||
	Path string   `json:"path"`
 | 
			
		||||
	Args []string `json:"args"`
 | 
			
		||||
	Env  []string `json:"env"`
 | 
			
		||||
	Dir  string   `json:"dir"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewCommandHooks will execute the provided command when the hook is run.
 | 
			
		||||
func NewCommandHook(cmd Command) CommandHook {
 | 
			
		||||
	return CommandHook{
 | 
			
		||||
		Command: cmd,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type CommandHook struct {
 | 
			
		||||
	Command
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c Command) Run(s HookState) error {
 | 
			
		||||
	b, err := json.Marshal(s)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	cmd := exec.Cmd{
 | 
			
		||||
		Path:  c.Path,
 | 
			
		||||
		Args:  c.Args,
 | 
			
		||||
		Env:   c.Env,
 | 
			
		||||
		Stdin: bytes.NewReader(b),
 | 
			
		||||
	}
 | 
			
		||||
	return cmd.Run()
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,10 +25,3 @@ type Mount struct {
 | 
			
		|||
	// Optional Command to be run after Source is mounted.
 | 
			
		||||
	PostmountCmds []Command `json:"postmount_cmds"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Command struct {
 | 
			
		||||
	Path string   `json:"path"`
 | 
			
		||||
	Args []string `json:"args"`
 | 
			
		||||
	Env  []string `json:"env"`
 | 
			
		||||
	Dir  string   `json:"dir"`
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -185,6 +185,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
 | 
			
		|||
		parentPipe: parentPipe,
 | 
			
		||||
		manager:    c.cgroupManager,
 | 
			
		||||
		config:     c.newInitConfig(p),
 | 
			
		||||
		container:  c,
 | 
			
		||||
	}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -247,6 +248,17 @@ func (c *linuxContainer) Destroy() error {
 | 
			
		|||
		err = rerr
 | 
			
		||||
	}
 | 
			
		||||
	c.initProcess = nil
 | 
			
		||||
	if c.config.Hooks != nil {
 | 
			
		||||
		s := configs.HookState{
 | 
			
		||||
			ID:   c.id,
 | 
			
		||||
			Root: c.config.Rootfs,
 | 
			
		||||
		}
 | 
			
		||||
		for _, hook := range c.config.Hooks.Poststop {
 | 
			
		||||
			if err := hook.Run(s); err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -299,7 +311,7 @@ func (c *linuxContainer) checkCriuVersion() error {
 | 
			
		|||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const descriptors_filename = "descriptors.json"
 | 
			
		||||
const descriptorsFilename = "descriptors.json"
 | 
			
		||||
 | 
			
		||||
func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
 | 
			
		||||
	mountDest := m.Destination
 | 
			
		||||
| 
						 | 
				
			
			@ -406,7 +418,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 | 
			
		|||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename), fdsJSON, 0655)
 | 
			
		||||
	err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -532,13 +544,19 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 | 
			
		|||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	for _, i := range criuOpts.VethPairs {
 | 
			
		||||
		veth := new(criurpc.CriuVethPair)
 | 
			
		||||
		veth.IfOut = proto.String(i.HostInterfaceName)
 | 
			
		||||
		veth.IfIn = proto.String(i.ContainerInterfaceName)
 | 
			
		||||
		req.Opts.Veths = append(req.Opts.Veths, veth)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var (
 | 
			
		||||
		fds    []string
 | 
			
		||||
		fdJSON []byte
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename)); err != nil {
 | 
			
		||||
	if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -568,6 +586,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 | 
			
		|||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
 | 
			
		||||
	criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
 | 
			
		||||
	criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
 | 
			
		||||
	defer criuClient.Close()
 | 
			
		||||
| 
						 | 
				
			
			@ -631,7 +650,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 | 
			
		|||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		if !resp.GetSuccess() {
 | 
			
		||||
			return fmt.Errorf("criu failed: type %s errno %d", req.GetType().String(), resp.GetCrErrno())
 | 
			
		||||
			typeString := req.GetType().String()
 | 
			
		||||
			return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		t := resp.GetType()
 | 
			
		||||
| 
						 | 
				
			
			@ -671,7 +691,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 | 
			
		|||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if !st.Success() {
 | 
			
		||||
		return fmt.Errorf("criu failed: %s", st.String())
 | 
			
		||||
		return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,6 +5,11 @@ type CriuPageServerInfo struct {
 | 
			
		|||
	Port    int32  // port number of CRIU page server
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type VethPairName struct {
 | 
			
		||||
	ContainerInterfaceName string
 | 
			
		||||
	HostInterfaceName      string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type CriuOpts struct {
 | 
			
		||||
	ImagesDirectory         string             // directory for storing image files
 | 
			
		||||
	WorkDirectory           string             // directory to cd and write logs/pidfiles/stats to
 | 
			
		||||
| 
						 | 
				
			
			@ -14,4 +19,5 @@ type CriuOpts struct {
 | 
			
		|||
	ShellJob                bool               // allow to dump and restore shell jobs
 | 
			
		||||
	FileLocks               bool               // handle file locks, for safety
 | 
			
		||||
	PageServer              CriuPageServerInfo // allow to dump to criu page server
 | 
			
		||||
	VethPairs               []VethPairName     // pass the veth to criu when restore
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,7 +5,9 @@ package libcontainer
 | 
			
		|||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"os"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"syscall"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -13,7 +15,6 @@ import (
 | 
			
		|||
	"github.com/opencontainers/runc/libcontainer/cgroups"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/configs"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/netlink"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/seccomp"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/system"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/user"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/utils"
 | 
			
		||||
| 
						 | 
				
			
			@ -239,6 +240,11 @@ func setupRlimits(config *configs.Config) error {
 | 
			
		|||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func setOomScoreAdj(oomScoreAdj int) error {
 | 
			
		||||
	path := "/proc/self/oom_score_adj"
 | 
			
		||||
	return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0700)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// killCgroupProcesses freezes then iterates over all the processes inside the
 | 
			
		||||
// manager's cgroups sending a SIGKILL to each process then waiting for them to
 | 
			
		||||
// exit.
 | 
			
		||||
| 
						 | 
				
			
			@ -270,61 +276,3 @@ func killCgroupProcesses(m cgroups.Manager) error {
 | 
			
		|||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func finalizeSeccomp(config *initConfig) error {
 | 
			
		||||
	if config.Config.Seccomp == nil {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	context := seccomp.New()
 | 
			
		||||
	for _, s := range config.Config.Seccomp.Syscalls {
 | 
			
		||||
		ss := &seccomp.Syscall{
 | 
			
		||||
			Value:  uint32(s.Value),
 | 
			
		||||
			Action: seccompAction(s.Action),
 | 
			
		||||
		}
 | 
			
		||||
		if len(s.Args) > 0 {
 | 
			
		||||
			ss.Args = seccompArgs(s.Args)
 | 
			
		||||
		}
 | 
			
		||||
		context.Add(ss)
 | 
			
		||||
	}
 | 
			
		||||
	return context.Load()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func seccompAction(a configs.Action) seccomp.Action {
 | 
			
		||||
	switch a {
 | 
			
		||||
	case configs.Kill:
 | 
			
		||||
		return seccomp.Kill
 | 
			
		||||
	case configs.Trap:
 | 
			
		||||
		return seccomp.Trap
 | 
			
		||||
	case configs.Allow:
 | 
			
		||||
		return seccomp.Allow
 | 
			
		||||
	}
 | 
			
		||||
	return seccomp.Error(syscall.Errno(int(a)))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func seccompArgs(args []*configs.Arg) seccomp.Args {
 | 
			
		||||
	var sa []seccomp.Arg
 | 
			
		||||
	for _, a := range args {
 | 
			
		||||
		sa = append(sa, seccomp.Arg{
 | 
			
		||||
			Index: uint32(a.Index),
 | 
			
		||||
			Op:    seccompOperator(a.Op),
 | 
			
		||||
			Value: uint(a.Value),
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
	return seccomp.Args{sa}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func seccompOperator(o configs.Operator) seccomp.Operator {
 | 
			
		||||
	switch o {
 | 
			
		||||
	case configs.EqualTo:
 | 
			
		||||
		return seccomp.EqualTo
 | 
			
		||||
	case configs.NotEqualTo:
 | 
			
		||||
		return seccomp.NotEqualTo
 | 
			
		||||
	case configs.GreatherThan:
 | 
			
		||||
		return seccomp.GreatherThan
 | 
			
		||||
	case configs.LessThan:
 | 
			
		||||
		return seccomp.LessThan
 | 
			
		||||
	case configs.MaskEqualTo:
 | 
			
		||||
		return seccomp.MaskEqualTo
 | 
			
		||||
	}
 | 
			
		||||
	return 0
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,7 +29,7 @@ func SetFileCreateLabel(fileLabel string) error {
 | 
			
		|||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Relabel(path string, fileLabel string, relabel string) error {
 | 
			
		||||
func Relabel(path string, fileLabel string, shared bool) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -59,3 +59,13 @@ func DupSecOpt(src string) []string {
 | 
			
		|||
func DisableSecOpt() []string {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Validate checks that the label does not include unexpected options
 | 
			
		||||
func Validate(label string) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IsShared checks that the label includes a "shared" mark
 | 
			
		||||
func IsShared(label string) bool {
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,6 +9,8 @@ import (
 | 
			
		|||
	"github.com/opencontainers/runc/libcontainer/selinux"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together")
 | 
			
		||||
 | 
			
		||||
// InitLabels returns the process label and file labels to be used within
 | 
			
		||||
// the container.  A list of options can be passed into this function to alter
 | 
			
		||||
// the labels.  The labels returned will include a random MCS String, that is
 | 
			
		||||
| 
						 | 
				
			
			@ -95,28 +97,24 @@ func SetFileCreateLabel(fileLabel string) error {
 | 
			
		|||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Change the label of path to the filelabel string.  If the relabel string
 | 
			
		||||
// is "z", relabel will change the MCS label to s0.  This will allow all
 | 
			
		||||
// containers to share the content.  If the relabel string is a "Z" then
 | 
			
		||||
// the MCS label should continue to be used.  SELinux will use this field
 | 
			
		||||
// to make sure the content can not be shared by other containes.
 | 
			
		||||
func Relabel(path string, fileLabel string, relabel string) error {
 | 
			
		||||
	exclude_path := []string{"/", "/usr", "/etc"}
 | 
			
		||||
// Change the label of path to the filelabel string.
 | 
			
		||||
// It changes the MCS label to s0 if shared is true.
 | 
			
		||||
// This will allow all containers to share the content.
 | 
			
		||||
func Relabel(path string, fileLabel string, shared bool) error {
 | 
			
		||||
	if !selinux.SelinuxEnabled() {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if fileLabel == "" {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	if !strings.ContainsAny(relabel, "zZ") {
 | 
			
		||||
		return nil
 | 
			
		||||
 | 
			
		||||
	exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true}
 | 
			
		||||
	if exclude_paths[path] {
 | 
			
		||||
		return fmt.Errorf("Relabeling of %s is not allowed", path)
 | 
			
		||||
	}
 | 
			
		||||
	for _, p := range exclude_path {
 | 
			
		||||
		if path == p {
 | 
			
		||||
			return fmt.Errorf("Relabeling of %s is not allowed", path)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if strings.Contains(relabel, "z") && strings.Contains(relabel, "Z") {
 | 
			
		||||
		return fmt.Errorf("Bad SELinux option z and Z can not be used together")
 | 
			
		||||
	}
 | 
			
		||||
	if strings.Contains(relabel, "z") {
 | 
			
		||||
 | 
			
		||||
	if shared {
 | 
			
		||||
		c := selinux.NewContext(fileLabel)
 | 
			
		||||
		c["level"] = "s0"
 | 
			
		||||
		fileLabel = c.Get()
 | 
			
		||||
| 
						 | 
				
			
			@ -161,3 +159,16 @@ func DupSecOpt(src string) []string {
 | 
			
		|||
func DisableSecOpt() []string {
 | 
			
		||||
	return selinux.DisableSecOpt()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Validate checks that the label does not include unexpected options
 | 
			
		||||
func Validate(label string) error {
 | 
			
		||||
	if strings.Contains(label, "z") && strings.Contains(label, "Z") {
 | 
			
		||||
		return ErrIncompatibleLabel
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IsShared checks that the label includes a "shared" mark
 | 
			
		||||
func IsShared(label string) bool {
 | 
			
		||||
	return strings.Contains(label, "z")
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,4 @@
 | 
			
		|||
// +build arm ppc64
 | 
			
		||||
// +build arm ppc64 ppc64le
 | 
			
		||||
 | 
			
		||||
package netlink
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,4 @@
 | 
			
		|||
// +build !arm,!ppc64
 | 
			
		||||
// +build !arm,!ppc64,!ppc64le
 | 
			
		||||
 | 
			
		||||
package netlink
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,6 +13,7 @@ import (
 | 
			
		|||
	"syscall"
 | 
			
		||||
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/cgroups"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/configs"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/system"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -138,11 +139,9 @@ func (p *setnsProcess) terminate() error {
 | 
			
		|||
 | 
			
		||||
func (p *setnsProcess) wait() (*os.ProcessState, error) {
 | 
			
		||||
	err := p.cmd.Wait()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return p.cmd.ProcessState, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return p.cmd.ProcessState, nil
 | 
			
		||||
	// Return actual ProcessState even on Wait error
 | 
			
		||||
	return p.cmd.ProcessState, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (p *setnsProcess) pid() int {
 | 
			
		||||
| 
						 | 
				
			
			@ -175,9 +174,9 @@ func (p *initProcess) externalDescriptors() []string {
 | 
			
		|||
	return p.fds
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (p *initProcess) start() error {
 | 
			
		||||
func (p *initProcess) start() (err error) {
 | 
			
		||||
	defer p.parentPipe.Close()
 | 
			
		||||
	err := p.cmd.Start()
 | 
			
		||||
	err = p.cmd.Start()
 | 
			
		||||
	p.childPipe.Close()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
| 
						 | 
				
			
			@ -202,6 +201,18 @@ func (p *initProcess) start() error {
 | 
			
		|||
			p.manager.Destroy()
 | 
			
		||||
		}
 | 
			
		||||
	}()
 | 
			
		||||
	if p.config.Config.Hooks != nil {
 | 
			
		||||
		s := configs.HookState{
 | 
			
		||||
			ID:   p.container.id,
 | 
			
		||||
			Pid:  p.pid(),
 | 
			
		||||
			Root: p.config.Config.Rootfs,
 | 
			
		||||
		}
 | 
			
		||||
		for _, hook := range p.config.Config.Hooks.Prestart {
 | 
			
		||||
			if err := hook.Run(s); err != nil {
 | 
			
		||||
				return newSystemError(err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err := p.createNetworkInterfaces(); err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -286,9 +297,7 @@ func (p *initProcess) setExternalDescriptors(newFds []string) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
func getPipeFds(pid int) ([]string, error) {
 | 
			
		||||
	var fds []string
 | 
			
		||||
 | 
			
		||||
	fds = make([]string, 3)
 | 
			
		||||
	fds := make([]string, 3)
 | 
			
		||||
 | 
			
		||||
	dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
 | 
			
		||||
	for i := 0; i < 3; i++ {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,6 +27,8 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
 | 
			
		|||
	if err := prepareRoot(config); err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	setupDev := len(config.Devices) == 0
 | 
			
		||||
	for _, m := range config.Mounts {
 | 
			
		||||
		for _, precmd := range m.PremountCmds {
 | 
			
		||||
			if err := mountCmd(precmd); err != nil {
 | 
			
		||||
| 
						 | 
				
			
			@ -43,14 +45,16 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
 | 
			
		|||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err := createDevices(config); err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
	}
 | 
			
		||||
	if err := setupPtmx(config, console); err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
	}
 | 
			
		||||
	if err := setupDevSymlinks(config.Rootfs); err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
	if !setupDev {
 | 
			
		||||
		if err := createDevices(config); err != nil {
 | 
			
		||||
			return newSystemError(err)
 | 
			
		||||
		}
 | 
			
		||||
		if err := setupPtmx(config, console); err != nil {
 | 
			
		||||
			return newSystemError(err)
 | 
			
		||||
		}
 | 
			
		||||
		if err := setupDevSymlinks(config.Rootfs); err != nil {
 | 
			
		||||
			return newSystemError(err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err := syscall.Chdir(config.Rootfs); err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
| 
						 | 
				
			
			@ -63,8 +67,10 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
 | 
			
		|||
	if err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
	}
 | 
			
		||||
	if err := reOpenDevNull(config.Rootfs); err != nil {
 | 
			
		||||
		return newSystemError(err)
 | 
			
		||||
	if !setupDev {
 | 
			
		||||
		if err := reOpenDevNull(config.Rootfs); err != nil {
 | 
			
		||||
			return newSystemError(err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if config.Readonlyfs {
 | 
			
		||||
		if err := setReadonly(); err != nil {
 | 
			
		||||
| 
						 | 
				
			
			@ -131,6 +137,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 | 
			
		|||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
 | 
			
		||||
	case "securityfs":
 | 
			
		||||
		if err := os.MkdirAll(dest, 0755); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
 | 
			
		||||
	case "bind":
 | 
			
		||||
		stat, err := os.Stat(m.Source)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
| 
						 | 
				
			
			@ -160,7 +171,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 | 
			
		|||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if m.Relabel != "" {
 | 
			
		||||
			if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil {
 | 
			
		||||
			if err := label.Validate(m.Relabel); err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
			shared := label.IsShared(m.Relabel)
 | 
			
		||||
			if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,34 +0,0 @@
 | 
			
		|||
// +build linux
 | 
			
		||||
 | 
			
		||||
package seccomp
 | 
			
		||||
 | 
			
		||||
import "strings"
 | 
			
		||||
 | 
			
		||||
type bpfLabel struct {
 | 
			
		||||
	label    string
 | 
			
		||||
	location uint32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type bpfLabels []bpfLabel
 | 
			
		||||
 | 
			
		||||
// labelIndex returns the index for the label if it exists in the slice.
 | 
			
		||||
// if it does not exist in the slice it appends the label lb to the end
 | 
			
		||||
// of the slice and returns the index.
 | 
			
		||||
func labelIndex(labels *bpfLabels, lb string) uint32 {
 | 
			
		||||
	var id uint32
 | 
			
		||||
	for id = 0; id < uint32(len(*labels)); id++ {
 | 
			
		||||
		if strings.EqualFold(lb, (*labels)[id].label) {
 | 
			
		||||
			return id
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	*labels = append(*labels, bpfLabel{lb, 0xffffffff})
 | 
			
		||||
	return id
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func scmpBpfStmt(code uint16, k uint32) sockFilter {
 | 
			
		||||
	return sockFilter{code, 0, 0, k}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
 | 
			
		||||
	return sockFilter{code, jt, jf, k}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										53
									
								
								vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go
									
										
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,53 @@
 | 
			
		|||
package seccomp
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/configs"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ConvertStringToOperator converts a string into a Seccomp comparison operator.
 | 
			
		||||
// Comparison operators use the names they are assigned by Libseccomp's header.
 | 
			
		||||
// Attempting to convert a string that is not a valid operator results in an
 | 
			
		||||
// error.
 | 
			
		||||
func ConvertStringToOperator(in string) (configs.Operator, error) {
 | 
			
		||||
	switch in {
 | 
			
		||||
	case "SCMP_CMP_NE":
 | 
			
		||||
		return configs.NotEqualTo, nil
 | 
			
		||||
	case "SCMP_CMP_LT":
 | 
			
		||||
		return configs.LessThan, nil
 | 
			
		||||
	case "SCMP_CMP_LE":
 | 
			
		||||
		return configs.LessThanOrEqualTo, nil
 | 
			
		||||
	case "SCMP_CMP_EQ":
 | 
			
		||||
		return configs.EqualTo, nil
 | 
			
		||||
	case "SCMP_CMP_GE":
 | 
			
		||||
		return configs.GreaterThan, nil
 | 
			
		||||
	case "SCMP_CMP_GT":
 | 
			
		||||
		return configs.GreaterThanOrEqualTo, nil
 | 
			
		||||
	case "SCMP_CMP_MASKED_EQ":
 | 
			
		||||
		return configs.MaskEqualTo, nil
 | 
			
		||||
	default:
 | 
			
		||||
		return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ConvertStringToAction converts a string into a Seccomp rule match action.
 | 
			
		||||
// Actions use the named they are assigned in Libseccomp's header, though some
 | 
			
		||||
// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
 | 
			
		||||
// return errors.
 | 
			
		||||
// Attempting to convert a string that is not a valid action results in an
 | 
			
		||||
// error.
 | 
			
		||||
func ConvertStringToAction(in string) (configs.Action, error) {
 | 
			
		||||
	switch in {
 | 
			
		||||
	case "SCMP_ACT_KILL":
 | 
			
		||||
		return configs.Kill, nil
 | 
			
		||||
	case "SCMP_ACT_ERRNO":
 | 
			
		||||
		return configs.Errno, nil
 | 
			
		||||
	case "SCMP_ACT_TRAP":
 | 
			
		||||
		return configs.Trap, nil
 | 
			
		||||
	case "SCMP_ACT_ALLOW":
 | 
			
		||||
		return configs.Allow, nil
 | 
			
		||||
	default:
 | 
			
		||||
		return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,146 +0,0 @@
 | 
			
		|||
// +build linux
 | 
			
		||||
 | 
			
		||||
package seccomp
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"errors"
 | 
			
		||||
	"syscall"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const labelTemplate = "lb-%d-%d"
 | 
			
		||||
 | 
			
		||||
// Action is the type of action that will be taken when a
 | 
			
		||||
// syscall is performed.
 | 
			
		||||
type Action int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	Kill  Action = iota - 3 // Kill the calling process of the syscall.
 | 
			
		||||
	Trap                    // Trap and coredump the calling process of the syscall.
 | 
			
		||||
	Allow                   // Allow the syscall to be completed.
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Syscall is the specified syscall, action, and any type of arguments
 | 
			
		||||
// to filter on.
 | 
			
		||||
type Syscall struct {
 | 
			
		||||
	// Value is the syscall number.
 | 
			
		||||
	Value uint32
 | 
			
		||||
	// Action is the action to perform when the specified syscall is made.
 | 
			
		||||
	Action Action
 | 
			
		||||
	// Args are filters that can be specified on the arguments to the syscall.
 | 
			
		||||
	Args Args
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *Syscall) scmpAction() uint32 {
 | 
			
		||||
	switch s.Action {
 | 
			
		||||
	case Allow:
 | 
			
		||||
		return retAllow
 | 
			
		||||
	case Trap:
 | 
			
		||||
		return retTrap
 | 
			
		||||
	case Kill:
 | 
			
		||||
		return retKill
 | 
			
		||||
	}
 | 
			
		||||
	return actionErrno(uint32(s.Action))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Arg represents an argument to the syscall with the argument's index,
 | 
			
		||||
// the operator to apply when matching, and the argument's value at that time.
 | 
			
		||||
type Arg struct {
 | 
			
		||||
	Index uint32   // index of args which start from zero
 | 
			
		||||
	Op    Operator // operation, such as EQ/NE/GE/LE
 | 
			
		||||
	Value uint     // the value of arg
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Args [][]Arg
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	ErrUnresolvedLabel      = errors.New("seccomp: unresolved label")
 | 
			
		||||
	ErrDuplicateLabel       = errors.New("seccomp: duplicate label use")
 | 
			
		||||
	ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Error returns an Action that will be used to send the calling
 | 
			
		||||
// process the specified errno when the syscall is made.
 | 
			
		||||
func Error(code syscall.Errno) Action {
 | 
			
		||||
	return Action(code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// New returns a new syscall context for use.
 | 
			
		||||
func New() *Context {
 | 
			
		||||
	return &Context{
 | 
			
		||||
		syscalls: make(map[uint32]*Syscall),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Context holds syscalls for the current process to limit the type of
 | 
			
		||||
// actions the calling process can make.
 | 
			
		||||
type Context struct {
 | 
			
		||||
	syscalls map[uint32]*Syscall
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Add will add the specified syscall, action, and arguments to the seccomp
 | 
			
		||||
// Context.
 | 
			
		||||
func (c *Context) Add(s *Syscall) {
 | 
			
		||||
	c.syscalls[s.Value] = s
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Remove removes the specified syscall configuration from the Context.
 | 
			
		||||
func (c *Context) Remove(call uint32) {
 | 
			
		||||
	delete(c.syscalls, call)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Load will apply the Context to the calling process makeing any secccomp process changes
 | 
			
		||||
// apply after the context is loaded.
 | 
			
		||||
func (c *Context) Load() error {
 | 
			
		||||
	filter, err := c.newFilter()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	prog := newSockFprog(filter)
 | 
			
		||||
	return prog.set()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *Context) newFilter() ([]sockFilter, error) {
 | 
			
		||||
	var (
 | 
			
		||||
		labels bpfLabels
 | 
			
		||||
		f      = newFilter()
 | 
			
		||||
	)
 | 
			
		||||
	for _, s := range c.syscalls {
 | 
			
		||||
		f.addSyscall(s, &labels)
 | 
			
		||||
	}
 | 
			
		||||
	f.allow()
 | 
			
		||||
	// process args for the syscalls
 | 
			
		||||
	for _, s := range c.syscalls {
 | 
			
		||||
		if err := f.addArguments(s, &labels); err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	// apply labels for arguments
 | 
			
		||||
	idx := int32(len(*f) - 1)
 | 
			
		||||
	for ; idx >= 0; idx-- {
 | 
			
		||||
		lf := &(*f)[idx]
 | 
			
		||||
		if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		rel := int32(lf.jt)<<8 | int32(lf.jf)
 | 
			
		||||
		if ((jumpJT << 8) | jumpJF) == rel {
 | 
			
		||||
			if labels[lf.k].location == 0xffffffff {
 | 
			
		||||
				return nil, ErrUnresolvedLabel
 | 
			
		||||
			}
 | 
			
		||||
			lf.k = labels[lf.k].location - uint32(idx+1)
 | 
			
		||||
			lf.jt = 0
 | 
			
		||||
			lf.jf = 0
 | 
			
		||||
		} else if ((labelJT << 8) | labelJF) == rel {
 | 
			
		||||
			if labels[lf.k].location != 0xffffffff {
 | 
			
		||||
				return nil, ErrDuplicateLabel
 | 
			
		||||
			}
 | 
			
		||||
			labels[lf.k].location = uint32(idx)
 | 
			
		||||
			lf.k = 0
 | 
			
		||||
			lf.jt = 0
 | 
			
		||||
			lf.jf = 0
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return *f, nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,118 +0,0 @@
 | 
			
		|||
// +build linux
 | 
			
		||||
 | 
			
		||||
package seccomp
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"syscall"
 | 
			
		||||
	"unsafe"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type sockFilter struct {
 | 
			
		||||
	code uint16
 | 
			
		||||
	jt   uint8
 | 
			
		||||
	jf   uint8
 | 
			
		||||
	k    uint32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newFilter() *filter {
 | 
			
		||||
	var f filter
 | 
			
		||||
	f = append(f, sockFilter{
 | 
			
		||||
		pfLD + syscall.BPF_W + syscall.BPF_ABS,
 | 
			
		||||
		0,
 | 
			
		||||
		0,
 | 
			
		||||
		uint32(unsafe.Offsetof(secData.nr)),
 | 
			
		||||
	})
 | 
			
		||||
	return &f
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type filter []sockFilter
 | 
			
		||||
 | 
			
		||||
func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
 | 
			
		||||
	if len(s.Args) == 0 {
 | 
			
		||||
		f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
 | 
			
		||||
	} else {
 | 
			
		||||
		if len(s.Args[0]) > 0 {
 | 
			
		||||
			lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
 | 
			
		||||
			f.call(s.Value,
 | 
			
		||||
				scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
 | 
			
		||||
					jumpJT, jumpJF))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
 | 
			
		||||
	for i := 0; len(s.Args) > i; i++ {
 | 
			
		||||
		if len(s.Args[i]) > 0 {
 | 
			
		||||
			lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
 | 
			
		||||
			f.label(labels, lb)
 | 
			
		||||
			f.arg(s.Args[i][0].Index)
 | 
			
		||||
		}
 | 
			
		||||
		for j := 0; j < len(s.Args[i]); j++ {
 | 
			
		||||
			var jf sockFilter
 | 
			
		||||
			if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
 | 
			
		||||
				lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
 | 
			
		||||
				jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
 | 
			
		||||
					labelIndex(labels, lbj), jumpJT, jumpJF)
 | 
			
		||||
			} else {
 | 
			
		||||
				jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
 | 
			
		||||
			}
 | 
			
		||||
			if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		f.allow()
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *filter) label(labels *bpfLabels, lb string) {
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *filter) call(nr uint32, jt sockFilter) {
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
 | 
			
		||||
	*f = append(*f, jt)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *filter) allow() {
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *filter) deny() {
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *filter) arg(index uint32) {
 | 
			
		||||
	arg(f, index)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
 | 
			
		||||
	switch operation {
 | 
			
		||||
	case EqualTo:
 | 
			
		||||
		jumpEqualTo(f, v, jf)
 | 
			
		||||
	case NotEqualTo:
 | 
			
		||||
		jumpNotEqualTo(f, v, jf)
 | 
			
		||||
	case GreatherThan:
 | 
			
		||||
		jumpGreaterThan(f, v, jf)
 | 
			
		||||
	case LessThan:
 | 
			
		||||
		jumpLessThan(f, v, jf)
 | 
			
		||||
	case MaskEqualTo:
 | 
			
		||||
		jumpMaskEqualTo(f, v, jf)
 | 
			
		||||
	default:
 | 
			
		||||
		return ErrUnsupportedOperation
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func arg(f *filter, idx uint32) {
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func jump(f *filter, labels *bpfLabels, lb string) {
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
 | 
			
		||||
		jumpJT, jumpJF))
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,68 +0,0 @@
 | 
			
		|||
// +build linux,amd64
 | 
			
		||||
 | 
			
		||||
package seccomp
 | 
			
		||||
 | 
			
		||||
// Using BPF filters
 | 
			
		||||
//
 | 
			
		||||
// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
 | 
			
		||||
import "syscall"
 | 
			
		||||
 | 
			
		||||
func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
 | 
			
		||||
	lo := uint32(uint64(v) % 0x100000000)
 | 
			
		||||
	hi := uint32(uint64(v) / 0x100000000)
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
	*f = append(*f, jt)
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func jumpEqualTo(f *filter, v uint, jt sockFilter) {
 | 
			
		||||
	lo := uint32(uint64(v) % 0x100000000)
 | 
			
		||||
	hi := uint32(uint64(v) / 0x100000000)
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
	*f = append(*f, jt)
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func jumpLessThan(f *filter, v uint, jt sockFilter) {
 | 
			
		||||
	lo := uint32(uint64(v) % 0x100000000)
 | 
			
		||||
	hi := uint32(uint64(v) / 0x100000000)
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
	*f = append(*f, jt)
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
 | 
			
		||||
	lo := uint32(uint64(v) % 0x100000000)
 | 
			
		||||
	hi := uint32(uint64(v) / 0x100000000)
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
	*f = append(*f, jt)
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// this checks for a value inside a mask. The evalusation is equal to doing
 | 
			
		||||
// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
 | 
			
		||||
func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
 | 
			
		||||
	lo := uint32(uint64(v) % 0x100000000)
 | 
			
		||||
	hi := uint32(uint64(v) / 0x100000000)
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
 | 
			
		||||
	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
	*f = append(*f, jt)
 | 
			
		||||
	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										165
									
								
								vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										165
									
								
								vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
									
										
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,165 @@
 | 
			
		|||
// +build linux,cgo,seccomp
 | 
			
		||||
 | 
			
		||||
package seccomp
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"log"
 | 
			
		||||
	"syscall"
 | 
			
		||||
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/configs"
 | 
			
		||||
	libseccomp "github.com/seccomp/libseccomp-golang"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	actAllow = libseccomp.ActAllow
 | 
			
		||||
	actTrap  = libseccomp.ActTrap
 | 
			
		||||
	actKill  = libseccomp.ActKill
 | 
			
		||||
	actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Filters given syscalls in a container, preventing them from being used
 | 
			
		||||
// Started in the container init process, and carried over to all child processes
 | 
			
		||||
// Setns calls, however, require a separate invocation, as they are not children
 | 
			
		||||
// of the init until they join the namespace
 | 
			
		||||
func InitSeccomp(config *configs.Seccomp) error {
 | 
			
		||||
	if config == nil {
 | 
			
		||||
		return fmt.Errorf("cannot initialize Seccomp - nil config passed")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	defaultAction, err := getAction(config.DefaultAction)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return fmt.Errorf("error initializing seccomp - invalid default action")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	filter, err := libseccomp.NewFilter(defaultAction)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return fmt.Errorf("error creating filter: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Unset no new privs bit
 | 
			
		||||
	if err := filter.SetNoNewPrivsBit(false); err != nil {
 | 
			
		||||
		return fmt.Errorf("error setting no new privileges: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Add a rule for each syscall
 | 
			
		||||
	for _, call := range config.Syscalls {
 | 
			
		||||
		if call == nil {
 | 
			
		||||
			return fmt.Errorf("encountered nil syscall while initializing Seccomp")
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err = matchCall(filter, call); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if err = filter.Load(); err != nil {
 | 
			
		||||
		return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert Libcontainer Action to Libseccomp ScmpAction
 | 
			
		||||
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
 | 
			
		||||
	switch act {
 | 
			
		||||
	case configs.Kill:
 | 
			
		||||
		return actKill, nil
 | 
			
		||||
	case configs.Errno:
 | 
			
		||||
		return actErrno, nil
 | 
			
		||||
	case configs.Trap:
 | 
			
		||||
		return actTrap, nil
 | 
			
		||||
	case configs.Allow:
 | 
			
		||||
		return actAllow, nil
 | 
			
		||||
	default:
 | 
			
		||||
		return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
 | 
			
		||||
func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
 | 
			
		||||
	switch op {
 | 
			
		||||
	case configs.EqualTo:
 | 
			
		||||
		return libseccomp.CompareEqual, nil
 | 
			
		||||
	case configs.NotEqualTo:
 | 
			
		||||
		return libseccomp.CompareNotEqual, nil
 | 
			
		||||
	case configs.GreaterThan:
 | 
			
		||||
		return libseccomp.CompareGreater, nil
 | 
			
		||||
	case configs.GreaterThanOrEqualTo:
 | 
			
		||||
		return libseccomp.CompareGreaterEqual, nil
 | 
			
		||||
	case configs.LessThan:
 | 
			
		||||
		return libseccomp.CompareLess, nil
 | 
			
		||||
	case configs.LessThanOrEqualTo:
 | 
			
		||||
		return libseccomp.CompareLessOrEqual, nil
 | 
			
		||||
	case configs.MaskEqualTo:
 | 
			
		||||
		return libseccomp.CompareMaskedEqual, nil
 | 
			
		||||
	default:
 | 
			
		||||
		return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert Libcontainer Arg to Libseccomp ScmpCondition
 | 
			
		||||
func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
 | 
			
		||||
	cond := libseccomp.ScmpCondition{}
 | 
			
		||||
 | 
			
		||||
	if arg == nil {
 | 
			
		||||
		return cond, fmt.Errorf("cannot convert nil to syscall condition")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	op, err := getOperator(arg.Op)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return cond, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Add a rule to match a single syscall
 | 
			
		||||
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
 | 
			
		||||
	if call == nil || filter == nil {
 | 
			
		||||
		return fmt.Errorf("cannot use nil as syscall to block")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(call.Name) == 0 {
 | 
			
		||||
		return fmt.Errorf("empty string is not a valid syscall")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// If we can't resolve the syscall, assume it's not supported on this kernel
 | 
			
		||||
	// Ignore it, don't error out
 | 
			
		||||
	callNum, err := libseccomp.GetSyscallFromName(call.Name)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Printf("Error resolving syscall name %s: %s - ignoring syscall.", call.Name, err)
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Convert the call's action to the libseccomp equivalent
 | 
			
		||||
	callAct, err := getAction(call.Action)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Unconditional match - just add the rule
 | 
			
		||||
	if len(call.Args) == 0 {
 | 
			
		||||
		if err = filter.AddRule(callNum, callAct); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		// Conditional match - convert the per-arg rules into library format
 | 
			
		||||
		conditions := []libseccomp.ScmpCondition{}
 | 
			
		||||
 | 
			
		||||
		for _, cond := range call.Args {
 | 
			
		||||
			newCond, err := getCondition(cond)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			conditions = append(conditions, newCond)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,124 +0,0 @@
 | 
			
		|||
// +build linux
 | 
			
		||||
 | 
			
		||||
// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
 | 
			
		||||
package seccomp
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"syscall"
 | 
			
		||||
	"unsafe"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Operator that is used for argument comparison.
 | 
			
		||||
type Operator int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	EqualTo Operator = iota
 | 
			
		||||
	NotEqualTo
 | 
			
		||||
	GreatherThan
 | 
			
		||||
	LessThan
 | 
			
		||||
	MaskEqualTo
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	jumpJT  = 0xff
 | 
			
		||||
	jumpJF  = 0xff
 | 
			
		||||
	labelJT = 0xfe
 | 
			
		||||
	labelJF = 0xfe
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	pfLD                 = 0x0
 | 
			
		||||
	retKill              = 0x00000000
 | 
			
		||||
	retTrap              = 0x00030000
 | 
			
		||||
	retAllow             = 0x7fff0000
 | 
			
		||||
	modeFilter           = 0x2
 | 
			
		||||
	prSetNoNewPrivileges = 0x26
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func actionErrno(errno uint32) uint32 {
 | 
			
		||||
	return 0x00050000 | (errno & 0x0000ffff)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	secData = struct {
 | 
			
		||||
		nr         int32
 | 
			
		||||
		arch       uint32
 | 
			
		||||
		insPointer uint64
 | 
			
		||||
		args       [6]uint64
 | 
			
		||||
	}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var isLittle = func() bool {
 | 
			
		||||
	var (
 | 
			
		||||
		x  = 0x1234
 | 
			
		||||
		p  = unsafe.Pointer(&x)
 | 
			
		||||
		p2 = (*[unsafe.Sizeof(0)]byte)(p)
 | 
			
		||||
	)
 | 
			
		||||
	if p2[0] == 0 {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	return true
 | 
			
		||||
}()
 | 
			
		||||
 | 
			
		||||
var endian endianSupport
 | 
			
		||||
 | 
			
		||||
type endianSupport struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (e endianSupport) hi(i uint32) uint32 {
 | 
			
		||||
	if isLittle {
 | 
			
		||||
		return e.little(i)
 | 
			
		||||
	}
 | 
			
		||||
	return e.big(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (e endianSupport) low(i uint32) uint32 {
 | 
			
		||||
	if isLittle {
 | 
			
		||||
		return e.big(i)
 | 
			
		||||
	}
 | 
			
		||||
	return e.little(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (endianSupport) big(idx uint32) uint32 {
 | 
			
		||||
	if idx >= 6 {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	return uint32(unsafe.Offsetof(secData.args)) + 8*idx
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (endianSupport) little(idx uint32) uint32 {
 | 
			
		||||
	if idx < 0 || idx >= 6 {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	return uint32(unsafe.Offsetof(secData.args)) +
 | 
			
		||||
		uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
 | 
			
		||||
	_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
 | 
			
		||||
	if err != 0 {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newSockFprog(filter []sockFilter) *sockFprog {
 | 
			
		||||
	return &sockFprog{
 | 
			
		||||
		len:  uint16(len(filter)),
 | 
			
		||||
		filt: filter,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type sockFprog struct {
 | 
			
		||||
	len  uint16
 | 
			
		||||
	filt []sockFilter
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *sockFprog) set() error {
 | 
			
		||||
	_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
 | 
			
		||||
		uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
 | 
			
		||||
	if err != 0 {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,3 +1,19 @@
 | 
			
		|||
// +build !linux
 | 
			
		||||
// +build !linux !cgo !seccomp
 | 
			
		||||
 | 
			
		||||
package seccomp
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"errors"
 | 
			
		||||
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/configs"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
 | 
			
		||||
 | 
			
		||||
// Seccomp not supported, do nothing
 | 
			
		||||
func InitSeccomp(config *configs.Seccomp) error {
 | 
			
		||||
	if config != nil {
 | 
			
		||||
		return ErrSeccompNotEnabled
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,7 @@ import (
 | 
			
		|||
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/apparmor"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/label"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/seccomp"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/system"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -20,6 +21,14 @@ func (l *linuxSetnsInit) Init() error {
 | 
			
		|||
	if err := setupRlimits(l.config.Config); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if l.config.Config.Seccomp != nil {
 | 
			
		||||
		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err := finalizeNamespace(l.config); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,6 +9,7 @@ import (
 | 
			
		|||
	"github.com/opencontainers/runc/libcontainer/apparmor"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/configs"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/label"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/seccomp"
 | 
			
		||||
	"github.com/opencontainers/runc/libcontainer/system"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -46,6 +47,10 @@ func (l *linuxStandardInit) Init() error {
 | 
			
		|||
	if err := setupRlimits(l.config.Config); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	label.Init()
 | 
			
		||||
	// InitializeMountNamespace() can be executed only for a new mount namespace
 | 
			
		||||
	if l.config.Config.Namespaces.Contains(configs.NEWNS) {
 | 
			
		||||
| 
						 | 
				
			
			@ -85,6 +90,11 @@ func (l *linuxStandardInit) Init() error {
 | 
			
		|||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if l.config.Config.Seccomp != nil {
 | 
			
		||||
		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err := finalizeNamespace(l.config); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -99,8 +109,5 @@ func (l *linuxStandardInit) Init() error {
 | 
			
		|||
	if syscall.Getppid() != l.parentPid {
 | 
			
		||||
		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
 | 
			
		||||
	}
 | 
			
		||||
	if err := finalizeSeccomp(l.config); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue