1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/profiles/seccomp/seccomp_linux.go
Rodrigo Campos 5d244675bd seccomp: Sync fields with runtime-spec fields
The runtime spec we are using has support for these 3 fields[1], but
moby doesn't have them in its seccomp struct. This patch just adds and
copies them when they are in the profile.

DefaultErrnoRet is implemented in the runc version moby is using (it is
implemented since runc-rc95[2]) but if we create a container without
this moby patch, we don't see an error nor the expected behavior. This
is not clear for the user (the profile they specify is valid, the syntax
is ok, but the wrong behavior is seen).

This is because the DefaultErrnoRet field is not copied to the config
passed ultimately to runc (i.e. is like the field was not specified).
With this patch, we see the expected behavior.

The other two fileds are in the runtime-spec but not yet in runc (a PR
is open and targets 1.1.0 milestone). However, I took the liberty to
copy them now too for two reasons:

1. If we don't add them now and end up using a runc version that
supports them, then the error that the user will see is not clear at
all:

	docker: Error response from daemon: OCI runtime create failed: container_linux.go:380: starting container process caused: listenerPath is not set: unknown.

And it is not obvious to debug for the user, as the field _is_ set in
the profile they specify (just not copied by moby to the profile moby
specifies ultimately to runc).

2. When using a runc without seccomp notify support (like today), the
error we see is the same with and without this moby patch (when using a
seccomp profile with the new fields):

	docker: Error response from daemon: OCI runtime create failed: string SCMP_ACT_NOTIFY is not a valid action for seccomp: unknown.

Then, it seems like a clear win to add them now: we don't have to do it
later (that implies not clear errors to the user if we forget, like we
did with DefaultErrnoRet) and the user sees the exact same error when
using a runc version that doesn't support these fields.

[1]: Note we are vendoring version 1c3f411f041711bbeecf35ff7e93461ea6789220 and this version has these 3 fields 1c3f411f04/config-linux.md (seccomp)
[2]: https://github.com/opencontainers/runc/pull/2954/
[3]: https://github.com/opencontainers/runc/pull/2682

Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io>
2021-07-08 17:11:53 +02:00

171 lines
4.6 KiB
Go

//go:generate go run -tags 'seccomp' generate.go
package seccomp // import "github.com/docker/docker/profiles/seccomp"
import (
"encoding/json"
"errors"
"fmt"
"runtime"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
// GetDefaultProfile returns the default seccomp profile.
func GetDefaultProfile(rs *specs.Spec) (*specs.LinuxSeccomp, error) {
return setupSeccomp(DefaultProfile(), rs)
}
// LoadProfile takes a json string and decodes the seccomp profile.
func LoadProfile(body string, rs *specs.Spec) (*specs.LinuxSeccomp, error) {
var config Seccomp
if err := json.Unmarshal([]byte(body), &config); err != nil {
return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err)
}
return setupSeccomp(&config, rs)
}
// libseccomp string => seccomp arch
var nativeToSeccomp = map[string]specs.Arch{
"x86": specs.ArchX86,
"amd64": specs.ArchX86_64,
"arm": specs.ArchARM,
"arm64": specs.ArchAARCH64,
"mips64": specs.ArchMIPS64,
"mips64n32": specs.ArchMIPS64N32,
"mipsel64": specs.ArchMIPSEL64,
"mips3l64n32": specs.ArchMIPSEL64N32,
"mipsle": specs.ArchMIPSEL,
"ppc": specs.ArchPPC,
"ppc64": specs.ArchPPC64,
"ppc64le": specs.ArchPPC64LE,
"s390": specs.ArchS390,
"s390x": specs.ArchS390X,
}
// GOARCH => libseccomp string
var goToNative = map[string]string{
"386": "x86",
"amd64": "amd64",
"arm": "arm",
"arm64": "arm64",
"mips64": "mips64",
"mips64p32": "mips64n32",
"mips64le": "mipsel64",
"mips64p32le": "mips3l64n32",
"mipsle": "mipsel",
"ppc": "ppc",
"ppc64": "ppc64",
"ppc64le": "ppc64le",
"s390": "s390",
"s390x": "s390x",
}
// inSlice tests whether a string is contained in a slice of strings or not.
// Comparison is case sensitive
func inSlice(slice []string, s string) bool {
for _, ss := range slice {
if s == ss {
return true
}
}
return false
}
func setupSeccomp(config *Seccomp, rs *specs.Spec) (*specs.LinuxSeccomp, error) {
if config == nil {
return nil, nil
}
// No default action specified, no syscalls listed, assume seccomp disabled
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
return nil, nil
}
newConfig := &specs.LinuxSeccomp{}
if len(config.Architectures) != 0 && len(config.ArchMap) != 0 {
return nil, errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'")
}
// if config.Architectures == 0 then libseccomp will figure out the architecture to use
if len(config.Architectures) != 0 {
newConfig.Architectures = config.Architectures
}
arch := goToNative[runtime.GOARCH]
seccompArch, archExists := nativeToSeccomp[arch]
if len(config.ArchMap) != 0 && archExists {
for _, a := range config.ArchMap {
if a.Arch == seccompArch {
newConfig.Architectures = append(newConfig.Architectures, a.Arch)
newConfig.Architectures = append(newConfig.Architectures, a.SubArches...)
break
}
}
}
newConfig.DefaultAction = config.DefaultAction
newConfig.DefaultErrnoRet = config.DefaultErrnoRet
newConfig.ListenerPath = config.ListenerPath
newConfig.ListenerMetadata = config.ListenerMetadata
Loop:
// Loop through all syscall blocks and convert them to libcontainer format after filtering them
for _, call := range config.Syscalls {
if call.Excludes != nil {
if len(call.Excludes.Arches) > 0 {
if inSlice(call.Excludes.Arches, arch) {
continue Loop
}
}
if len(call.Excludes.Caps) > 0 {
for _, c := range call.Excludes.Caps {
if inSlice(rs.Process.Capabilities.Bounding, c) {
continue Loop
}
}
}
if call.Excludes.MinKernel != nil {
if ok, err := kernelGreaterEqualThan(*call.Excludes.MinKernel); err != nil {
return nil, err
} else if ok {
continue Loop
}
}
}
if call.Includes != nil {
if len(call.Includes.Arches) > 0 {
if !inSlice(call.Includes.Arches, arch) {
continue Loop
}
}
if len(call.Includes.Caps) > 0 {
for _, c := range call.Includes.Caps {
if !inSlice(rs.Process.Capabilities.Bounding, c) {
continue Loop
}
}
}
if call.Includes.MinKernel != nil {
if ok, err := kernelGreaterEqualThan(*call.Includes.MinKernel); err != nil {
return nil, err
} else if !ok {
continue Loop
}
}
}
if call.Name != "" {
if len(call.Names) != 0 {
return nil, errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'")
}
call.Names = append(call.Names, call.Name)
}
newConfig.Syscalls = append(newConfig.Syscalls, call.LinuxSyscall)
}
return newConfig, nil
}