2020-09-18 12:14:16 -04:00
|
|
|
package seccomp // import "github.com/docker/docker/profiles/seccomp"
|
2016-09-06 14:18:12 -04:00
|
|
|
|
2020-10-01 06:26:46 -04:00
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
|
|
)
|
2020-09-18 12:49:38 -04:00
|
|
|
|
2016-09-06 14:18:12 -04:00
|
|
|
// Seccomp represents the config for a seccomp profile for syscall restriction.
|
|
|
|
type Seccomp struct {
|
seccomp: Sync fields with runtime-spec fields
The runtime spec we are using has support for these 3 fields[1], but
moby doesn't have them in its seccomp struct. This patch just adds and
copies them when they are in the profile.
DefaultErrnoRet is implemented in the runc version moby is using (it is
implemented since runc-rc95[2]) but if we create a container without
this moby patch, we don't see an error nor the expected behavior. This
is not clear for the user (the profile they specify is valid, the syntax
is ok, but the wrong behavior is seen).
This is because the DefaultErrnoRet field is not copied to the config
passed ultimately to runc (i.e. is like the field was not specified).
With this patch, we see the expected behavior.
The other two fileds are in the runtime-spec but not yet in runc (a PR
is open and targets 1.1.0 milestone). However, I took the liberty to
copy them now too for two reasons:
1. If we don't add them now and end up using a runc version that
supports them, then the error that the user will see is not clear at
all:
docker: Error response from daemon: OCI runtime create failed: container_linux.go:380: starting container process caused: listenerPath is not set: unknown.
And it is not obvious to debug for the user, as the field _is_ set in
the profile they specify (just not copied by moby to the profile moby
specifies ultimately to runc).
2. When using a runc without seccomp notify support (like today), the
error we see is the same with and without this moby patch (when using a
seccomp profile with the new fields):
docker: Error response from daemon: OCI runtime create failed: string SCMP_ACT_NOTIFY is not a valid action for seccomp: unknown.
Then, it seems like a clear win to add them now: we don't have to do it
later (that implies not clear errors to the user if we forget, like we
did with DefaultErrnoRet) and the user sees the exact same error when
using a runc version that doesn't support these fields.
[1]: Note we are vendoring version 1c3f411f041711bbeecf35ff7e93461ea6789220 and this version has these 3 fields https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#seccomp
[2]: https://github.com/opencontainers/runc/pull/2954/
[3]: https://github.com/opencontainers/runc/pull/2682
Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io>
2021-06-25 09:51:06 -04:00
|
|
|
DefaultAction specs.LinuxSeccompAction `json:"defaultAction"`
|
|
|
|
DefaultErrnoRet *uint `json:"defaultErrnoRet,omitempty"`
|
|
|
|
ListenerPath string `json:"listenerPath,omitempty"`
|
|
|
|
ListenerMetadata string `json:"listenerMetadata,omitempty"`
|
|
|
|
|
2016-09-06 14:18:12 -04:00
|
|
|
// Architectures is kept to maintain backward compatibility with the old
|
|
|
|
// seccomp profile.
|
2021-07-16 09:33:33 -04:00
|
|
|
Architectures []specs.Arch `json:"architectures,omitempty"`
|
|
|
|
|
|
|
|
// ArchMap contains a list of Architectures and Sub-architectures for the
|
|
|
|
// profile. When generating the profile, this list is expanded to a
|
|
|
|
// []specs.Arch, to propagate the Architectures field of the profile.
|
|
|
|
ArchMap []Architecture `json:"archMap,omitempty"`
|
|
|
|
|
|
|
|
// Syscalls contains lists of syscall rules. Rules can define conditions
|
|
|
|
// for them to be included or excluded in the resulting profile (based on
|
|
|
|
// on kernel version, architecture, capabilities, etc.). These lists are
|
|
|
|
// expanded to an specs.Syscall When generating the profile, these lists
|
|
|
|
// are expanded to a []specs.LinuxSyscall.
|
|
|
|
Syscalls []*Syscall `json:"syscalls"`
|
2016-09-06 14:18:12 -04:00
|
|
|
}
|
|
|
|
|
2016-12-19 01:45:48 -05:00
|
|
|
// Architecture is used to represent a specific architecture
|
2016-09-06 14:18:12 -04:00
|
|
|
// and its sub-architectures
|
|
|
|
type Architecture struct {
|
2020-09-18 12:49:38 -04:00
|
|
|
Arch specs.Arch `json:"architecture"`
|
|
|
|
SubArches []specs.Arch `json:"subArchitectures"`
|
2016-09-06 14:18:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Filter is used to conditionally apply Seccomp rules
|
|
|
|
type Filter struct {
|
seccomp: remove dependency on pkg/parsers/kernel
This removes the dependency on the `pkg/parsers/kernel` package, because secomp
only needs to consider Linux (and no parsing is needed for Windows or Darwin kernel
versions).
This patch implements the minimum requirements for this implementation:
- only `kernel` and `major` versions are considered
- `minor` version, `flavor`, and `-rcXX` suffixes are ignored
So, for example:
- `3.4.54.longterm-1` => `kernel: 3`, `major: 4`
- `3.8.0-19-generic` => `kernel: 3`, `major: 8`
- `3.10.0-862.2.3.el7.x86_64` => `kernel: 3`, `major: 10`
Some systems also omit the `minor` and/or have odd-formatted versions. In context
of generating seccomp profiles, both versions below are considered equal;
- `3.12.25-gentoo` => `kernel: 3`, `major: 12`
- `3.12-1-amd64` => `kernel: 3`, `major: 12`
Note that `-rcX` suffixes are also not considered, and thus (e.g.) kernel `5.9-rc1`,
`5.9-rc6` and `5.9` are all considered equal.
The motivation for ignoring "minor" versions and "flavors" is that;
- The upstream kernel only does "kernel.major" releases
- While release-candidates exists for kernel (e.g. 5.9-rc5), we don't expect users
to write profiles that target a specific release-candidate, and therefore consider
(e.g.) kernel `5.9-rc1`, `5.9-rc6` and `5.9` to be equal.
- Generally, a seccomp-profile should either be portable, or written for a specific
infrastructure (in which case the writer of the profile would know if the kernel-flavors
used does/does not support certain things.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-09-25 09:06:25 -04:00
|
|
|
Caps []string `json:"caps,omitempty"`
|
|
|
|
Arches []string `json:"arches,omitempty"`
|
|
|
|
|
|
|
|
// MinKernel describes the minimum kernel version the rule must be applied
|
|
|
|
// on, in the format "<kernel version>.<major revision>" (e.g. "3.12").
|
|
|
|
//
|
|
|
|
// When matching the kernel version of the host, minor revisions, and distro-
|
|
|
|
// specific suffixes are ignored, which means that "3.12.25-gentoo", "3.12-1-amd64",
|
|
|
|
// "3.12", and "3.12-rc5" are considered equal (kernel 3, major revision 12).
|
2020-10-01 06:26:46 -04:00
|
|
|
MinKernel *KernelVersion `json:"minKernel,omitempty"`
|
2016-09-06 14:18:12 -04:00
|
|
|
}
|
|
|
|
|
2021-02-09 08:30:14 -05:00
|
|
|
// Syscall is used to match a group of syscalls in Seccomp. It extends the
|
|
|
|
// runtime-spec Syscall type, adding a "Name" field for backward compatibility
|
|
|
|
// with older JSON representations, additional "Comment" metadata, and conditional
|
|
|
|
// rules ("Includes", "Excludes") used to generate a runtime-spec Seccomp profile
|
|
|
|
// based on the container (capabilities) and host's (arch, kernel) configuration.
|
2016-09-06 14:18:12 -04:00
|
|
|
type Syscall struct {
|
2021-02-09 08:30:14 -05:00
|
|
|
specs.LinuxSyscall
|
|
|
|
// Deprecated: kept for backward compatibility with old JSON profiles, use Names instead
|
2021-02-09 08:46:50 -05:00
|
|
|
Name string `json:"name,omitempty"`
|
|
|
|
Comment string `json:"comment,omitempty"`
|
|
|
|
Includes *Filter `json:"includes,omitempty"`
|
|
|
|
Excludes *Filter `json:"excludes,omitempty"`
|
2016-09-06 14:18:12 -04:00
|
|
|
}
|
2020-10-01 06:26:46 -04:00
|
|
|
|
|
|
|
// KernelVersion holds information about the kernel.
|
|
|
|
type KernelVersion struct {
|
|
|
|
Kernel uint64 // Version of the Kernel (i.e., the "4" in "4.1.2-generic")
|
|
|
|
Major uint64 // Major revision of the Kernel (i.e., the "1" in "4.1.2-generic")
|
|
|
|
}
|
|
|
|
|
|
|
|
// String implements fmt.Stringer for KernelVersion
|
|
|
|
func (k *KernelVersion) String() string {
|
|
|
|
if k.Kernel > 0 || k.Major > 0 {
|
|
|
|
return fmt.Sprintf("%d.%d", k.Kernel, k.Major)
|
|
|
|
}
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
// MarshalJSON implements json.Unmarshaler for KernelVersion
|
|
|
|
func (k *KernelVersion) MarshalJSON() ([]byte, error) {
|
|
|
|
return json.Marshal(k.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnmarshalJSON implements json.Marshaler for KernelVersion
|
|
|
|
func (k *KernelVersion) UnmarshalJSON(version []byte) error {
|
|
|
|
var (
|
|
|
|
ver string
|
|
|
|
err error
|
|
|
|
)
|
|
|
|
|
|
|
|
// make sure we have a string
|
|
|
|
if err = json.Unmarshal(version, &ver); err != nil {
|
|
|
|
return fmt.Errorf(`invalid kernel version: %s, expected "<kernel>.<major>": %v`, string(version), err)
|
|
|
|
}
|
|
|
|
if ver == "" {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
parts := strings.SplitN(ver, ".", 3)
|
|
|
|
if len(parts) != 2 {
|
|
|
|
return fmt.Errorf(`invalid kernel version: %s, expected "<kernel>.<major>"`, string(version))
|
|
|
|
}
|
|
|
|
if k.Kernel, err = strconv.ParseUint(parts[0], 10, 8); err != nil {
|
|
|
|
return fmt.Errorf(`invalid kernel version: %s, expected "<kernel>.<major>": %v`, string(version), err)
|
|
|
|
}
|
|
|
|
if k.Major, err = strconv.ParseUint(parts[1], 10, 8); err != nil {
|
|
|
|
return fmt.Errorf(`invalid kernel version: %s, expected "<kernel>.<major>": %v`, string(version), err)
|
|
|
|
}
|
|
|
|
if k.Kernel == 0 && k.Major == 0 {
|
|
|
|
return fmt.Errorf(`invalid kernel version: %s, expected "<kernel>.<major>": version cannot be 0.0`, string(version))
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|