diff --git a/daemon/container_unix.go b/daemon/container_unix.go index 86155d0e01..4eca92ed30 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -1112,12 +1112,9 @@ func (container *Container) unmountVolumes(forceSyscall bool) error { func (container *Container) networkMounts() []execdriver.Mount { var mounts []execdriver.Mount - mode := "Z" - if container.hostConfig.NetworkMode.IsContainer() { - mode = "z" - } + shared := container.hostConfig.NetworkMode.IsContainer() if container.ResolvConfPath != "" { - label.Relabel(container.ResolvConfPath, container.MountLabel, mode) + label.Relabel(container.ResolvConfPath, container.MountLabel, shared) writable := !container.hostConfig.ReadonlyRootfs if m, exists := container.MountPoints["/etc/resolv.conf"]; exists { writable = m.RW @@ -1130,7 +1127,7 @@ func (container *Container) networkMounts() []execdriver.Mount { }) } if container.HostnamePath != "" { - label.Relabel(container.HostnamePath, container.MountLabel, mode) + label.Relabel(container.HostnamePath, container.MountLabel, shared) writable := !container.hostConfig.ReadonlyRootfs if m, exists := container.MountPoints["/etc/hostname"]; exists { writable = m.RW @@ -1143,7 +1140,7 @@ func (container *Container) networkMounts() []execdriver.Mount { }) } if container.HostsPath != "" { - label.Relabel(container.HostsPath, container.MountLabel, mode) + label.Relabel(container.HostsPath, container.MountLabel, shared) writable := !container.hostConfig.ReadonlyRootfs if m, exists := container.MountPoints["/etc/hosts"]; exists { writable = m.RW diff --git a/daemon/create_unix.go b/daemon/create_unix.go index 8d5137b789..a0b45e02c3 100644 --- a/daemon/create_unix.go +++ b/daemon/create_unix.go @@ -59,7 +59,7 @@ func createContainerPlatformSpecificSettings(container *Container, config *runco return err } - if err := label.Relabel(v.Path(), container.MountLabel, "z"); err != nil { + if err := label.Relabel(v.Path(), container.MountLabel, true); err != nil { return err } diff --git a/daemon/volumes_unix.go b/daemon/volumes_unix.go index 5d8bb5fd8d..1670e0fe0e 100644 --- a/daemon/volumes_unix.go +++ b/daemon/volumes_unix.go @@ -355,7 +355,8 @@ func (daemon *Daemon) registerMountPoints(container *Container, hostConfig *runc } } - if err := label.Relabel(bind.Source, container.MountLabel, bind.Mode); err != nil { + shared := label.IsShared(bind.Mode) + if err := label.Relabel(bind.Source, container.MountLabel, shared); err != nil { return err } binds[bind.Destination] = true diff --git a/hack/vendor.sh b/hack/vendor.sh index ac379015f4..b84f1b14b1 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c -clone git github.com/opencontainers/runc v0.0.3 # libcontainer +clone git github.com/opencontainers/runc v0.0.4 # libcontainer # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) clone git github.com/coreos/go-systemd v3 clone git github.com/godbus/dbus v2 diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go index f75dab1be8..815f08253b 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go @@ -83,7 +83,7 @@ type data struct { pid int } -func (m *Manager) Apply(pid int) error { +func (m *Manager) Apply(pid int) (err error) { if m.Cgroups == nil { return nil } @@ -235,12 +235,12 @@ func getCgroupData(c *configs.Cgroup, pid int) (*data, error) { }, nil } -func (raw *data) parent(subsystem, mountpoint, src string) (string, error) { - initPath, err := cgroups.GetInitCgroupDir(subsystem) +func (raw *data) parent(subsystem, mountpoint, root string) (string, error) { + initPath, err := cgroups.GetThisCgroupDir(subsystem) if err != nil { return "", err } - relDir, err := filepath.Rel(src, initPath) + relDir, err := filepath.Rel(root, initPath) if err != nil { return "", err } @@ -248,7 +248,7 @@ func (raw *data) parent(subsystem, mountpoint, src string) (string, error) { } func (raw *data) path(subsystem string) (string, error) { - mnt, src, err := cgroups.FindCgroupMountpointAndSource(subsystem) + mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem) // If we didn't mount the subsystem, there is no point we make the path. if err != nil { return "", err @@ -259,7 +259,7 @@ func (raw *data) path(subsystem string) (string, error) { return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil } - parent, err := raw.parent(subsystem, mnt, src) + parent, err := raw.parent(subsystem, mnt, root) if err != nil { return "", err } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go index 1fa8bd74c1..23c68f8372 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go @@ -17,7 +17,7 @@ import ( type MemoryGroup struct { } -func (s *MemoryGroup) Apply(d *data) error { +func (s *MemoryGroup) Apply(d *data) (err error) { path, err := d.path("memory") if err != nil { if cgroups.IsNotFound(err) { @@ -28,21 +28,22 @@ func (s *MemoryGroup) Apply(d *data) error { if err := os.MkdirAll(path, 0755); err != nil { return err } + + defer func() { + if err != nil { + os.RemoveAll(path) + } + }() + if err := s.Set(path, d.c); err != nil { return err } // We need to join memory cgroup after set memory limits, because // kmem.limit_in_bytes can only be set when the cgroup is empty. - _, err = d.join("memory") - if err != nil { + if _, err = d.join("memory"); err != nil { return err } - defer func() { - if err != nil { - os.RemoveAll(path) - } - }() return nil } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index 1d0cb502cd..3a182684f1 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -21,6 +21,9 @@ const cgroupNamePrefix = "name=" // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { + // We are not using mount.GetMounts() because it's super-inefficient, + // parsing it directly sped up x10 times because of not using Sscanf. + // It was one of two major performance drawbacks in container start. f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err @@ -44,7 +47,7 @@ func FindCgroupMountpoint(subsystem string) (string, error) { return "", NewNotFoundError(subsystem) } -func FindCgroupMountpointAndSource(subsystem string) (string, string, error) { +func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) { f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", "", err @@ -69,15 +72,28 @@ func FindCgroupMountpointAndSource(subsystem string) (string, string, error) { } func FindCgroupMountpointDir() (string, error) { - mounts, err := mount.GetMounts() + f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err } + defer f.Close() - for _, mount := range mounts { - if mount.Fstype == "cgroup" { - return filepath.Dir(mount.Mountpoint), nil + scanner := bufio.NewScanner(f) + for scanner.Scan() { + text := scanner.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + if len(postSeparatorFields) < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) } + if postSeparatorFields[0] == "cgroup" { + return filepath.Dir(fields[4]), nil + } + } + if err := scanner.Err(); err != nil { + return "", err } return "", NewNotFoundError("cgroup") diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go index 83381c84c2..f26287608a 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -1,5 +1,11 @@ package configs +import ( + "bytes" + "encoding/json" + "os/exec" +) + type Rlimit struct { Type int `json:"type"` Hard uint64 `json:"hard"` @@ -13,36 +19,46 @@ type IDMap struct { Size int `json:"size"` } +// Seccomp represents syscall restrictions type Seccomp struct { - Syscalls []*Syscall `json:"syscalls"` + DefaultAction Action `json:"default_action"` + Syscalls []*Syscall `json:"syscalls"` } +// An action to be taken upon rule match in Seccomp type Action int const ( - Kill Action = iota - 3 + Kill Action = iota - 4 + Errno Trap Allow ) +// A comparison operator to be used when matching syscall arguments in Seccomp type Operator int const ( EqualTo Operator = iota NotEqualTo - GreatherThan + GreaterThan + GreaterThanOrEqualTo LessThan + LessThanOrEqualTo MaskEqualTo ) +// A rule to match a specific syscall argument in Seccomp type Arg struct { - Index int `json:"index"` - Value uint32 `json:"value"` - Op Operator `json:"op"` + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"value_two"` + Op Operator `json:"op"` } +// An rule to match a syscall in Seccomp type Syscall struct { - Value int `json:"value"` + Name string `json:"name"` Action Action `json:"action"` Args []*Arg `json:"args"` } @@ -117,6 +133,12 @@ type Config struct { // If Rlimits are not set, the container will inherit rlimits from the parent process Rlimits []Rlimit `json:"rlimits"` + // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores + // for a process. Valid values are between the range [-1000, '1000'], where processes with + // higher scores are preferred for being killed. + // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ + OomScoreAdj int `json:"oom_score_adj"` + // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. AdditionalGroups []string `json:"additional_groups"` @@ -140,7 +162,79 @@ type Config struct { Sysctl map[string]string `json:"sysctl"` // Seccomp allows actions to be taken whenever a syscall is made within the container. - // By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno - // can be specified on a per syscall basis. + // A number of rules are given, each having an action to be taken if a syscall matches it. + // A default action to be taken if no rules match is also given. Seccomp *Seccomp `json:"seccomp"` + + // Hooks are a collection of actions to perform at various container lifecycle events. + // Hooks are not able to be marshaled to json but they are also not needed to. + Hooks *Hooks `json:"-"` +} + +type Hooks struct { + // Prestart commands are executed after the container namespaces are created, + // but before the user supplied command is executed from init. + Prestart []Hook + + // Poststop commands are executed after the container init process exits. + Poststop []Hook +} + +// HookState is the payload provided to a hook on execution. +type HookState struct { + ID string `json:"id"` + Pid int `json:"pid"` + Root string `json:"root"` +} + +type Hook interface { + // Run executes the hook with the provided state. + Run(HookState) error +} + +// NewFunctionHooks will call the provided function when the hook is run. +func NewFunctionHook(f func(HookState) error) FuncHook { + return FuncHook{ + run: f, + } +} + +type FuncHook struct { + run func(HookState) error +} + +func (f FuncHook) Run(s HookState) error { + return f.run(s) +} + +type Command struct { + Path string `json:"path"` + Args []string `json:"args"` + Env []string `json:"env"` + Dir string `json:"dir"` +} + +// NewCommandHooks will execute the provided command when the hook is run. +func NewCommandHook(cmd Command) CommandHook { + return CommandHook{ + Command: cmd, + } +} + +type CommandHook struct { + Command +} + +func (c Command) Run(s HookState) error { + b, err := json.Marshal(s) + if err != nil { + return err + } + cmd := exec.Cmd{ + Path: c.Path, + Args: c.Args, + Env: c.Env, + Stdin: bytes.NewReader(b), + } + return cmd.Run() } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go index 5a69f815e4..50668f04fc 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -25,10 +25,3 @@ type Mount struct { // Optional Command to be run after Source is mounted. PostmountCmds []Command `json:"postmount_cmds"` } - -type Command struct { - Path string `json:"path"` - Args []string `json:"args"` - Env []string `json:"env"` - Dir string `json:"dir"` -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go index 9a27eb432f..574773bf51 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go @@ -185,6 +185,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c parentPipe: parentPipe, manager: c.cgroupManager, config: c.newInitConfig(p), + container: c, }, nil } @@ -247,6 +248,17 @@ func (c *linuxContainer) Destroy() error { err = rerr } c.initProcess = nil + if c.config.Hooks != nil { + s := configs.HookState{ + ID: c.id, + Root: c.config.Rootfs, + } + for _, hook := range c.config.Hooks.Poststop { + if err := hook.Run(s); err != nil { + return err + } + } + } return err } @@ -299,7 +311,7 @@ func (c *linuxContainer) checkCriuVersion() error { return nil } -const descriptors_filename = "descriptors.json" +const descriptorsFilename = "descriptors.json" func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { mountDest := m.Destination @@ -406,7 +418,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { return err } - err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename), fdsJSON, 0655) + err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655) if err != nil { return err } @@ -532,13 +544,19 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { break } } + for _, i := range criuOpts.VethPairs { + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(i.HostInterfaceName) + veth.IfIn = proto.String(i.ContainerInterfaceName) + req.Opts.Veths = append(req.Opts.Veths, veth) + } var ( fds []string fdJSON []byte ) - if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename)); err != nil { + if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil { return err } @@ -568,6 +586,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * return err } + logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile()) criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") defer criuClient.Close() @@ -631,7 +650,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * return err } if !resp.GetSuccess() { - return fmt.Errorf("criu failed: type %s errno %d", req.GetType().String(), resp.GetCrErrno()) + typeString := req.GetType().String() + return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath) } t := resp.GetType() @@ -671,7 +691,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * return err } if !st.Success() { - return fmt.Errorf("criu failed: %s", st.String()) + return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath) } return nil } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go b/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go index bca81672ea..794d5bd54f 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go @@ -5,6 +5,11 @@ type CriuPageServerInfo struct { Port int32 // port number of CRIU page server } +type VethPairName struct { + ContainerInterfaceName string + HostInterfaceName string +} + type CriuOpts struct { ImagesDirectory string // directory for storing image files WorkDirectory string // directory to cd and write logs/pidfiles/stats to @@ -14,4 +19,5 @@ type CriuOpts struct { ShellJob bool // allow to dump and restore shell jobs FileLocks bool // handle file locks, for safety PageServer CriuPageServerInfo // allow to dump to criu page server + VethPairs []VethPairName // pass the veth to criu when restore } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go index fd124f6de0..6854a2d902 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go @@ -5,7 +5,9 @@ package libcontainer import ( "encoding/json" "fmt" + "io/ioutil" "os" + "strconv" "strings" "syscall" @@ -13,7 +15,6 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/netlink" - "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/utils" @@ -239,6 +240,11 @@ func setupRlimits(config *configs.Config) error { return nil } +func setOomScoreAdj(oomScoreAdj int) error { + path := "/proc/self/oom_score_adj" + return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0700) +} + // killCgroupProcesses freezes then iterates over all the processes inside the // manager's cgroups sending a SIGKILL to each process then waiting for them to // exit. @@ -270,61 +276,3 @@ func killCgroupProcesses(m cgroups.Manager) error { } return nil } - -func finalizeSeccomp(config *initConfig) error { - if config.Config.Seccomp == nil { - return nil - } - context := seccomp.New() - for _, s := range config.Config.Seccomp.Syscalls { - ss := &seccomp.Syscall{ - Value: uint32(s.Value), - Action: seccompAction(s.Action), - } - if len(s.Args) > 0 { - ss.Args = seccompArgs(s.Args) - } - context.Add(ss) - } - return context.Load() -} - -func seccompAction(a configs.Action) seccomp.Action { - switch a { - case configs.Kill: - return seccomp.Kill - case configs.Trap: - return seccomp.Trap - case configs.Allow: - return seccomp.Allow - } - return seccomp.Error(syscall.Errno(int(a))) -} - -func seccompArgs(args []*configs.Arg) seccomp.Args { - var sa []seccomp.Arg - for _, a := range args { - sa = append(sa, seccomp.Arg{ - Index: uint32(a.Index), - Op: seccompOperator(a.Op), - Value: uint(a.Value), - }) - } - return seccomp.Args{sa} -} - -func seccompOperator(o configs.Operator) seccomp.Operator { - switch o { - case configs.EqualTo: - return seccomp.EqualTo - case configs.NotEqualTo: - return seccomp.NotEqualTo - case configs.GreatherThan: - return seccomp.GreatherThan - case configs.LessThan: - return seccomp.LessThan - case configs.MaskEqualTo: - return seccomp.MaskEqualTo - } - return 0 -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/label/label.go b/vendor/src/github.com/opencontainers/runc/libcontainer/label/label.go index 5a540fd5a0..3df30ef075 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/label/label.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/label/label.go @@ -29,7 +29,7 @@ func SetFileCreateLabel(fileLabel string) error { return nil } -func Relabel(path string, fileLabel string, relabel string) error { +func Relabel(path string, fileLabel string, shared bool) error { return nil } @@ -59,3 +59,13 @@ func DupSecOpt(src string) []string { func DisableSecOpt() []string { return nil } + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + return nil +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return false +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/label/label_selinux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/label/label_selinux.go index 886861a3b9..e21b2fbbb2 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/label/label_selinux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/label/label_selinux.go @@ -9,6 +9,8 @@ import ( "github.com/opencontainers/runc/libcontainer/selinux" ) +var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together") + // InitLabels returns the process label and file labels to be used within // the container. A list of options can be passed into this function to alter // the labels. The labels returned will include a random MCS String, that is @@ -95,28 +97,24 @@ func SetFileCreateLabel(fileLabel string) error { return nil } -// Change the label of path to the filelabel string. If the relabel string -// is "z", relabel will change the MCS label to s0. This will allow all -// containers to share the content. If the relabel string is a "Z" then -// the MCS label should continue to be used. SELinux will use this field -// to make sure the content can not be shared by other containes. -func Relabel(path string, fileLabel string, relabel string) error { - exclude_path := []string{"/", "/usr", "/etc"} +// Change the label of path to the filelabel string. +// It changes the MCS label to s0 if shared is true. +// This will allow all containers to share the content. +func Relabel(path string, fileLabel string, shared bool) error { + if !selinux.SelinuxEnabled() { + return nil + } + if fileLabel == "" { return nil } - if !strings.ContainsAny(relabel, "zZ") { - return nil + + exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true} + if exclude_paths[path] { + return fmt.Errorf("Relabeling of %s is not allowed", path) } - for _, p := range exclude_path { - if path == p { - return fmt.Errorf("Relabeling of %s is not allowed", path) - } - } - if strings.Contains(relabel, "z") && strings.Contains(relabel, "Z") { - return fmt.Errorf("Bad SELinux option z and Z can not be used together") - } - if strings.Contains(relabel, "z") { + + if shared { c := selinux.NewContext(fileLabel) c["level"] = "s0" fileLabel = c.Get() @@ -161,3 +159,16 @@ func DupSecOpt(src string) []string { func DisableSecOpt() []string { return selinux.DisableSecOpt() } + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + if strings.Contains(label, "z") && strings.Contains(label, "Z") { + return ErrIncompatibleLabel + } + return nil +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return strings.Contains(label, "z") +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go index 04135f300b..965e0bfbc7 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go @@ -1,4 +1,4 @@ -// +build arm ppc64 +// +build arm ppc64 ppc64le package netlink diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go index 62380d6b8e..7446279892 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go @@ -1,4 +1,4 @@ -// +build !arm,!ppc64 +// +build !arm,!ppc64,!ppc64le package netlink diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go index ec640f27ae..f191c16ee2 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go @@ -13,6 +13,7 @@ import ( "syscall" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" ) @@ -138,11 +139,9 @@ func (p *setnsProcess) terminate() error { func (p *setnsProcess) wait() (*os.ProcessState, error) { err := p.cmd.Wait() - if err != nil { - return p.cmd.ProcessState, err - } - return p.cmd.ProcessState, nil + // Return actual ProcessState even on Wait error + return p.cmd.ProcessState, err } func (p *setnsProcess) pid() int { @@ -175,9 +174,9 @@ func (p *initProcess) externalDescriptors() []string { return p.fds } -func (p *initProcess) start() error { +func (p *initProcess) start() (err error) { defer p.parentPipe.Close() - err := p.cmd.Start() + err = p.cmd.Start() p.childPipe.Close() if err != nil { return newSystemError(err) @@ -202,6 +201,18 @@ func (p *initProcess) start() error { p.manager.Destroy() } }() + if p.config.Config.Hooks != nil { + s := configs.HookState{ + ID: p.container.id, + Pid: p.pid(), + Root: p.config.Config.Rootfs, + } + for _, hook := range p.config.Config.Hooks.Prestart { + if err := hook.Run(s); err != nil { + return newSystemError(err) + } + } + } if err := p.createNetworkInterfaces(); err != nil { return newSystemError(err) } @@ -286,9 +297,7 @@ func (p *initProcess) setExternalDescriptors(newFds []string) { } func getPipeFds(pid int) ([]string, error) { - var fds []string - - fds = make([]string, 3) + fds := make([]string, 3) dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") for i := 0; i < 3; i++ { diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go index 88aa77d517..ecdc7ca64f 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go @@ -27,6 +27,8 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) { if err := prepareRoot(config); err != nil { return newSystemError(err) } + + setupDev := len(config.Devices) == 0 for _, m := range config.Mounts { for _, precmd := range m.PremountCmds { if err := mountCmd(precmd); err != nil { @@ -43,14 +45,16 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) { } } } - if err := createDevices(config); err != nil { - return newSystemError(err) - } - if err := setupPtmx(config, console); err != nil { - return newSystemError(err) - } - if err := setupDevSymlinks(config.Rootfs); err != nil { - return newSystemError(err) + if !setupDev { + if err := createDevices(config); err != nil { + return newSystemError(err) + } + if err := setupPtmx(config, console); err != nil { + return newSystemError(err) + } + if err := setupDevSymlinks(config.Rootfs); err != nil { + return newSystemError(err) + } } if err := syscall.Chdir(config.Rootfs); err != nil { return newSystemError(err) @@ -63,8 +67,10 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) { if err != nil { return newSystemError(err) } - if err := reOpenDevNull(config.Rootfs); err != nil { - return newSystemError(err) + if !setupDev { + if err := reOpenDevNull(config.Rootfs); err != nil { + return newSystemError(err) + } } if config.Readonlyfs { if err := setReadonly(); err != nil { @@ -131,6 +137,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { return err } return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data) + case "securityfs": + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data) case "bind": stat, err := os.Stat(m.Source) if err != nil { @@ -160,7 +171,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { } } if m.Relabel != "" { - if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil { + if err := label.Validate(m.Relabel); err != nil { + return err + } + shared := label.IsShared(m.Relabel) + if err := label.Relabel(m.Source, mountLabel, shared); err != nil { return err } } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/bpf.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/bpf.go deleted file mode 100644 index 65908ecc8a..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/bpf.go +++ /dev/null @@ -1,34 +0,0 @@ -// +build linux - -package seccomp - -import "strings" - -type bpfLabel struct { - label string - location uint32 -} - -type bpfLabels []bpfLabel - -// labelIndex returns the index for the label if it exists in the slice. -// if it does not exist in the slice it appends the label lb to the end -// of the slice and returns the index. -func labelIndex(labels *bpfLabels, lb string) uint32 { - var id uint32 - for id = 0; id < uint32(len(*labels)); id++ { - if strings.EqualFold(lb, (*labels)[id].label) { - return id - } - } - *labels = append(*labels, bpfLabel{lb, 0xffffffff}) - return id -} - -func scmpBpfStmt(code uint16, k uint32) sockFilter { - return sockFilter{code, 0, 0, k} -} - -func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter { - return sockFilter{code, jt, jf, k} -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go new file mode 100644 index 0000000000..5acf06685c --- /dev/null +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go @@ -0,0 +1,53 @@ +package seccomp + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +// ConvertStringToOperator converts a string into a Seccomp comparison operator. +// Comparison operators use the names they are assigned by Libseccomp's header. +// Attempting to convert a string that is not a valid operator results in an +// error. +func ConvertStringToOperator(in string) (configs.Operator, error) { + switch in { + case "SCMP_CMP_NE": + return configs.NotEqualTo, nil + case "SCMP_CMP_LT": + return configs.LessThan, nil + case "SCMP_CMP_LE": + return configs.LessThanOrEqualTo, nil + case "SCMP_CMP_EQ": + return configs.EqualTo, nil + case "SCMP_CMP_GE": + return configs.GreaterThan, nil + case "SCMP_CMP_GT": + return configs.GreaterThanOrEqualTo, nil + case "SCMP_CMP_MASKED_EQ": + return configs.MaskEqualTo, nil + default: + return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in) + } +} + +// ConvertStringToAction converts a string into a Seccomp rule match action. +// Actions use the named they are assigned in Libseccomp's header, though some +// (notable, SCMP_ACT_TRACE) are not available in this implementation and will +// return errors. +// Attempting to convert a string that is not a valid action results in an +// error. +func ConvertStringToAction(in string) (configs.Action, error) { + switch in { + case "SCMP_ACT_KILL": + return configs.Kill, nil + case "SCMP_ACT_ERRNO": + return configs.Errno, nil + case "SCMP_ACT_TRAP": + return configs.Trap, nil + case "SCMP_ACT_ALLOW": + return configs.Allow, nil + default: + return 0, fmt.Errorf("string %s is not a valid action for seccomp", in) + } +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/context.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/context.go deleted file mode 100644 index 6d0b7c3c3b..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/context.go +++ /dev/null @@ -1,146 +0,0 @@ -// +build linux - -package seccomp - -import ( - "errors" - "syscall" -) - -const labelTemplate = "lb-%d-%d" - -// Action is the type of action that will be taken when a -// syscall is performed. -type Action int - -const ( - Kill Action = iota - 3 // Kill the calling process of the syscall. - Trap // Trap and coredump the calling process of the syscall. - Allow // Allow the syscall to be completed. -) - -// Syscall is the specified syscall, action, and any type of arguments -// to filter on. -type Syscall struct { - // Value is the syscall number. - Value uint32 - // Action is the action to perform when the specified syscall is made. - Action Action - // Args are filters that can be specified on the arguments to the syscall. - Args Args -} - -func (s *Syscall) scmpAction() uint32 { - switch s.Action { - case Allow: - return retAllow - case Trap: - return retTrap - case Kill: - return retKill - } - return actionErrno(uint32(s.Action)) -} - -// Arg represents an argument to the syscall with the argument's index, -// the operator to apply when matching, and the argument's value at that time. -type Arg struct { - Index uint32 // index of args which start from zero - Op Operator // operation, such as EQ/NE/GE/LE - Value uint // the value of arg -} - -type Args [][]Arg - -var ( - ErrUnresolvedLabel = errors.New("seccomp: unresolved label") - ErrDuplicateLabel = errors.New("seccomp: duplicate label use") - ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument") -) - -// Error returns an Action that will be used to send the calling -// process the specified errno when the syscall is made. -func Error(code syscall.Errno) Action { - return Action(code) -} - -// New returns a new syscall context for use. -func New() *Context { - return &Context{ - syscalls: make(map[uint32]*Syscall), - } -} - -// Context holds syscalls for the current process to limit the type of -// actions the calling process can make. -type Context struct { - syscalls map[uint32]*Syscall -} - -// Add will add the specified syscall, action, and arguments to the seccomp -// Context. -func (c *Context) Add(s *Syscall) { - c.syscalls[s.Value] = s -} - -// Remove removes the specified syscall configuration from the Context. -func (c *Context) Remove(call uint32) { - delete(c.syscalls, call) -} - -// Load will apply the Context to the calling process makeing any secccomp process changes -// apply after the context is loaded. -func (c *Context) Load() error { - filter, err := c.newFilter() - if err != nil { - return err - } - if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil { - return err - } - prog := newSockFprog(filter) - return prog.set() -} - -func (c *Context) newFilter() ([]sockFilter, error) { - var ( - labels bpfLabels - f = newFilter() - ) - for _, s := range c.syscalls { - f.addSyscall(s, &labels) - } - f.allow() - // process args for the syscalls - for _, s := range c.syscalls { - if err := f.addArguments(s, &labels); err != nil { - return nil, err - } - } - // apply labels for arguments - idx := int32(len(*f) - 1) - for ; idx >= 0; idx-- { - lf := &(*f)[idx] - if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) { - continue - } - rel := int32(lf.jt)<<8 | int32(lf.jf) - if ((jumpJT << 8) | jumpJF) == rel { - if labels[lf.k].location == 0xffffffff { - return nil, ErrUnresolvedLabel - } - lf.k = labels[lf.k].location - uint32(idx+1) - lf.jt = 0 - lf.jf = 0 - } else if ((labelJT << 8) | labelJF) == rel { - if labels[lf.k].location != 0xffffffff { - return nil, ErrDuplicateLabel - } - labels[lf.k].location = uint32(idx) - lf.k = 0 - lf.jt = 0 - lf.jf = 0 - } - } - return *f, nil -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/filter.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/filter.go deleted file mode 100644 index 658fbddd4a..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/filter.go +++ /dev/null @@ -1,118 +0,0 @@ -// +build linux - -package seccomp - -import ( - "fmt" - "syscall" - "unsafe" -) - -type sockFilter struct { - code uint16 - jt uint8 - jf uint8 - k uint32 -} - -func newFilter() *filter { - var f filter - f = append(f, sockFilter{ - pfLD + syscall.BPF_W + syscall.BPF_ABS, - 0, - 0, - uint32(unsafe.Offsetof(secData.nr)), - }) - return &f -} - -type filter []sockFilter - -func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) { - if len(s.Args) == 0 { - f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())) - } else { - if len(s.Args[0]) > 0 { - lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index) - f.call(s.Value, - scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), - jumpJT, jumpJF)) - } - } -} - -func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error { - for i := 0; len(s.Args) > i; i++ { - if len(s.Args[i]) > 0 { - lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index) - f.label(labels, lb) - f.arg(s.Args[i][0].Index) - } - for j := 0; j < len(s.Args[i]); j++ { - var jf sockFilter - if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 { - lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index) - jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, - labelIndex(labels, lbj), jumpJT, jumpJF) - } else { - jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()) - } - if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil { - return err - } - } - f.allow() - } - return nil -} - -func (f *filter) label(labels *bpfLabels, lb string) { - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF)) -} - -func (f *filter) call(nr uint32, jt sockFilter) { - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1)) - *f = append(*f, jt) -} - -func (f *filter) allow() { - *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow)) -} - -func (f *filter) deny() { - *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap)) -} - -func (f *filter) arg(index uint32) { - arg(f, index) -} - -func (f *filter) op(operation Operator, v uint, jf sockFilter) error { - switch operation { - case EqualTo: - jumpEqualTo(f, v, jf) - case NotEqualTo: - jumpNotEqualTo(f, v, jf) - case GreatherThan: - jumpGreaterThan(f, v, jf) - case LessThan: - jumpLessThan(f, v, jf) - case MaskEqualTo: - jumpMaskEqualTo(f, v, jf) - default: - return ErrUnsupportedOperation - } - return nil -} - -func arg(f *filter, idx uint32) { - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx))) - *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx))) - *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1)) -} - -func jump(f *filter, labels *bpfLabels, lb string) { - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), - jumpJT, jumpJF)) -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/jump_amd64.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/jump_amd64.go deleted file mode 100644 index f0d07716a4..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/jump_amd64.go +++ /dev/null @@ -1,68 +0,0 @@ -// +build linux,amd64 - -package seccomp - -// Using BPF filters -// -// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf -import "syscall" - -func jumpGreaterThan(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -func jumpEqualTo(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -func jumpLessThan(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -func jumpNotEqualTo(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -// this checks for a value inside a mask. The evalusation is equal to doing -// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER -func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v))) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go new file mode 100644 index 0000000000..58bdbf6d63 --- /dev/null +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go @@ -0,0 +1,165 @@ +// +build linux,cgo,seccomp + +package seccomp + +import ( + "fmt" + "log" + "syscall" + + "github.com/opencontainers/runc/libcontainer/configs" + libseccomp "github.com/seccomp/libseccomp-golang" +) + +var ( + actAllow = libseccomp.ActAllow + actTrap = libseccomp.ActTrap + actKill = libseccomp.ActKill + actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM)) +) + +// Filters given syscalls in a container, preventing them from being used +// Started in the container init process, and carried over to all child processes +// Setns calls, however, require a separate invocation, as they are not children +// of the init until they join the namespace +func InitSeccomp(config *configs.Seccomp) error { + if config == nil { + return fmt.Errorf("cannot initialize Seccomp - nil config passed") + } + + defaultAction, err := getAction(config.DefaultAction) + if err != nil { + return fmt.Errorf("error initializing seccomp - invalid default action") + } + + filter, err := libseccomp.NewFilter(defaultAction) + if err != nil { + return fmt.Errorf("error creating filter: %s", err) + } + + // Unset no new privs bit + if err := filter.SetNoNewPrivsBit(false); err != nil { + return fmt.Errorf("error setting no new privileges: %s", err) + } + + // Add a rule for each syscall + for _, call := range config.Syscalls { + if call == nil { + return fmt.Errorf("encountered nil syscall while initializing Seccomp") + } + + if err = matchCall(filter, call); err != nil { + return err + } + } + + if err = filter.Load(); err != nil { + return fmt.Errorf("error loading seccomp filter into kernel: %s", err) + } + + return nil +} + +// Convert Libcontainer Action to Libseccomp ScmpAction +func getAction(act configs.Action) (libseccomp.ScmpAction, error) { + switch act { + case configs.Kill: + return actKill, nil + case configs.Errno: + return actErrno, nil + case configs.Trap: + return actTrap, nil + case configs.Allow: + return actAllow, nil + default: + return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule") + } +} + +// Convert Libcontainer Operator to Libseccomp ScmpCompareOp +func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { + switch op { + case configs.EqualTo: + return libseccomp.CompareEqual, nil + case configs.NotEqualTo: + return libseccomp.CompareNotEqual, nil + case configs.GreaterThan: + return libseccomp.CompareGreater, nil + case configs.GreaterThanOrEqualTo: + return libseccomp.CompareGreaterEqual, nil + case configs.LessThan: + return libseccomp.CompareLess, nil + case configs.LessThanOrEqualTo: + return libseccomp.CompareLessOrEqual, nil + case configs.MaskEqualTo: + return libseccomp.CompareMaskedEqual, nil + default: + return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule") + } +} + +// Convert Libcontainer Arg to Libseccomp ScmpCondition +func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { + cond := libseccomp.ScmpCondition{} + + if arg == nil { + return cond, fmt.Errorf("cannot convert nil to syscall condition") + } + + op, err := getOperator(arg.Op) + if err != nil { + return cond, err + } + + return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) +} + +// Add a rule to match a single syscall +func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { + if call == nil || filter == nil { + return fmt.Errorf("cannot use nil as syscall to block") + } + + if len(call.Name) == 0 { + return fmt.Errorf("empty string is not a valid syscall") + } + + // If we can't resolve the syscall, assume it's not supported on this kernel + // Ignore it, don't error out + callNum, err := libseccomp.GetSyscallFromName(call.Name) + if err != nil { + log.Printf("Error resolving syscall name %s: %s - ignoring syscall.", call.Name, err) + return nil + } + + // Convert the call's action to the libseccomp equivalent + callAct, err := getAction(call.Action) + if err != nil { + return err + } + + // Unconditional match - just add the rule + if len(call.Args) == 0 { + if err = filter.AddRule(callNum, callAct); err != nil { + return err + } + } else { + // Conditional match - convert the per-arg rules into library format + conditions := []libseccomp.ScmpCondition{} + + for _, cond := range call.Args { + newCond, err := getCondition(cond) + if err != nil { + return err + } + + conditions = append(conditions, newCond) + } + + if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { + return err + } + } + + return nil +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unix.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unix.go deleted file mode 100644 index a68a4dcc64..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unix.go +++ /dev/null @@ -1,124 +0,0 @@ -// +build linux - -// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go. -package seccomp - -import ( - "syscall" - "unsafe" -) - -// Operator that is used for argument comparison. -type Operator int - -const ( - EqualTo Operator = iota - NotEqualTo - GreatherThan - LessThan - MaskEqualTo -) - -const ( - jumpJT = 0xff - jumpJF = 0xff - labelJT = 0xfe - labelJF = 0xfe -) - -const ( - pfLD = 0x0 - retKill = 0x00000000 - retTrap = 0x00030000 - retAllow = 0x7fff0000 - modeFilter = 0x2 - prSetNoNewPrivileges = 0x26 -) - -func actionErrno(errno uint32) uint32 { - return 0x00050000 | (errno & 0x0000ffff) -} - -var ( - secData = struct { - nr int32 - arch uint32 - insPointer uint64 - args [6]uint64 - }{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}} -) - -var isLittle = func() bool { - var ( - x = 0x1234 - p = unsafe.Pointer(&x) - p2 = (*[unsafe.Sizeof(0)]byte)(p) - ) - if p2[0] == 0 { - return false - } - return true -}() - -var endian endianSupport - -type endianSupport struct { -} - -func (e endianSupport) hi(i uint32) uint32 { - if isLittle { - return e.little(i) - } - return e.big(i) -} - -func (e endianSupport) low(i uint32) uint32 { - if isLittle { - return e.big(i) - } - return e.little(i) -} - -func (endianSupport) big(idx uint32) uint32 { - if idx >= 6 { - return 0 - } - return uint32(unsafe.Offsetof(secData.args)) + 8*idx -} - -func (endianSupport) little(idx uint32) uint32 { - if idx < 0 || idx >= 6 { - return 0 - } - return uint32(unsafe.Offsetof(secData.args)) + - uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch)) -} - -func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error { - _, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) - if err != 0 { - return err - } - return nil -} - -func newSockFprog(filter []sockFilter) *sockFprog { - return &sockFprog{ - len: uint16(len(filter)), - filt: filter, - } -} - -type sockFprog struct { - len uint16 - filt []sockFilter -} - -func (s *sockFprog) set() error { - _, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP), - uintptr(modeFilter), uintptr(unsafe.Pointer(s))) - if err != 0 { - return err - } - return nil -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go index 821dd57c0a..87d3abbc64 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go @@ -1,3 +1,19 @@ -// +build !linux +// +build !linux !cgo !seccomp package seccomp + +import ( + "errors" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") + +// Seccomp not supported, do nothing +func InitSeccomp(config *configs.Seccomp) error { + if config != nil { + return ErrSeccompNotEnabled + } + return nil +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/setns_init_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/setns_init_linux.go index 334d3e25cd..2bde44ffb4 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/setns_init_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/setns_init_linux.go @@ -7,6 +7,7 @@ import ( "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" ) @@ -20,6 +21,14 @@ func (l *linuxSetnsInit) Init() error { if err := setupRlimits(l.config.Config); err != nil { return err } + if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil { + return err + } + if l.config.Config.Seccomp != nil { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } if err := finalizeNamespace(l.config); err != nil { return err } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/standard_init_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/standard_init_linux.go index b399aa5d1f..ec1005789c 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/standard_init_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/standard_init_linux.go @@ -9,6 +9,7 @@ import ( "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" ) @@ -46,6 +47,10 @@ func (l *linuxStandardInit) Init() error { if err := setupRlimits(l.config.Config); err != nil { return err } + if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil { + return err + } + label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { @@ -85,6 +90,11 @@ func (l *linuxStandardInit) Init() error { if err != nil { return err } + if l.config.Config.Seccomp != nil { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } if err := finalizeNamespace(l.config); err != nil { return err } @@ -99,8 +109,5 @@ func (l *linuxStandardInit) Init() error { if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } - if err := finalizeSeccomp(l.config); err != nil { - return err - } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }