mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
set default seccomp profile
Signed-off-by: Jessica Frazelle <acidburn@docker.com>
This commit is contained in:
parent
ad56c972b4
commit
947293a280
5 changed files with 374 additions and 3 deletions
|
@ -69,6 +69,10 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
|
|||
if err := d.setCapabilities(container, c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if c.SeccompProfile == "" {
|
||||
container.Seccomp = getDefaultSeccompProfile()
|
||||
}
|
||||
}
|
||||
// add CAP_ prefix to all caps for new libcontainer update to match
|
||||
// the spec format.
|
||||
|
@ -89,6 +93,7 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
|
|||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := execdriver.SetupCgroups(container, c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -12,6 +12,10 @@ import (
|
|||
"github.com/opencontainers/specs"
|
||||
)
|
||||
|
||||
func getDefaultSeccompProfile() *configs.Seccomp {
|
||||
return defaultSeccompProfile
|
||||
}
|
||||
|
||||
func loadSeccompProfile(path string) (*configs.Seccomp, error) {
|
||||
f, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
|
|
319
daemon/execdriver/native/seccomp_default.go
Normal file
319
daemon/execdriver/native/seccomp_default.go
Normal file
|
@ -0,0 +1,319 @@
|
|||
// +build linux
|
||||
|
||||
package native
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
var defaultSeccompProfile = &configs.Seccomp{
|
||||
DefaultAction: configs.Allow,
|
||||
Syscalls: []*configs.Syscall{
|
||||
{
|
||||
// Quota and Accounting syscalls which could let containers
|
||||
// disable their own resource limits or process accounting
|
||||
Name: "acct",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Prevent containers from using the kernel keyring,
|
||||
// which is not namespaced
|
||||
Name: "add_key",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Similar to clock_settime and settimeofday
|
||||
// Time/Date is not namespaced
|
||||
Name: "adjtimex",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Time/Date is not namespaced
|
||||
Name: "clock_settime",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny cloning new namespaces
|
||||
Name: "clone",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{
|
||||
{
|
||||
// flags from sched.h
|
||||
// CLONE_NEWUTS 0x04000000
|
||||
// CLONE_NEWIPC 0x08000000
|
||||
// CLONE_NEWUSER 0x10000000
|
||||
// CLONE_NEWPID 0x20000000
|
||||
// CLONE_NEWNET 0x40000000
|
||||
Index: 0,
|
||||
Value: uint64(0x04000000),
|
||||
Op: configs.GreaterThanOrEqualTo,
|
||||
},
|
||||
{
|
||||
// flags from sched.h
|
||||
// CLONE_NEWNS 0x00020000
|
||||
Index: 0,
|
||||
Value: uint64(0x00020000),
|
||||
Op: configs.EqualTo,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Deny manipulation and functions on kernel modules.
|
||||
Name: "create_module",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny manipulation and functions on kernel modules.
|
||||
Name: "delete_module",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny retrieval of exported kernel and module symbols
|
||||
Name: "get_kernel_syms",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
||||
// They're gated by CAP_SYS_NICE,
|
||||
// which we do not retain by default in containers.
|
||||
Name: "get_mempolicy",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny getting the list of robust futexes
|
||||
Name: "get_robust_list",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny manipulation and functions on kernel modules.
|
||||
Name: "init_module",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Prevent containers from modifying kernel I/O privilege levels.
|
||||
// Already restricted as containers drop CAP_SYS_RAWIO by default.
|
||||
Name: "ioperm",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Prevent containers from modifying kernel I/O privilege levels.
|
||||
// Already restricted as containers drop CAP_SYS_RAWIO by default.
|
||||
Name: "iopl",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Sister syscall of kexec_load that does the same thing,
|
||||
// slightly different arguments
|
||||
Name: "kexec_file_load",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny loading a new kernel for later execution
|
||||
Name: "kexec_load",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Prevent containers from using the kernel keyring,
|
||||
// which is not namespaced
|
||||
Name: "keyctl",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Tracing/profiling syscalls,
|
||||
// which could leak a lot of information on the host
|
||||
Name: "lookup_dcookie",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
||||
// They're gated by CAP_SYS_NICE,
|
||||
// which we do not retain by default in containers.
|
||||
Name: "mbind",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
||||
// They're gated by CAP_SYS_NICE,
|
||||
// which we do not retain by default in containers.
|
||||
Name: "migrate_pages",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Old syscall only used in 16-bit code,
|
||||
// and a potential information leak
|
||||
Name: "modify_ldt",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny mount
|
||||
Name: "mount",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
||||
// They're gated by CAP_SYS_NICE,
|
||||
// which we do not retain by default in containers.
|
||||
Name: "move_pages",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny interaction with the kernel nfs daemon
|
||||
Name: "nfsservctl",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Cause of an old container breakout,
|
||||
// might as well restrict it to be on the safe side
|
||||
Name: "open_by_handle_at",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Tracing/profiling syscalls,
|
||||
// which could leak a lot of information on the host
|
||||
Name: "perf_event_open",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Prevent container from enabling BSD emulation.
|
||||
// Not inherently dangerous, but poorly tested,
|
||||
// potential for a lot of kernel vulns in this.
|
||||
Name: "personality",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny pivot_root
|
||||
Name: "pivot_root",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Already blocked by dropping CAP_PTRACE
|
||||
Name: "ptrace",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny manipulation and functions on kernel modules.
|
||||
Name: "query_module",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Quota and Accounting syscalls which could let containers
|
||||
// disable their own resource limits or process accounting
|
||||
Name: "quotactl",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Probably a bad idea to let containers reboot the host
|
||||
Name: "reboot",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Probably a bad idea to let containers restart
|
||||
Name: "restart_syscall",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Prevent containers from using the kernel keyring,
|
||||
// which is not namespaced
|
||||
Name: "request_key",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// meta, deny seccomp
|
||||
Name: "seccomp",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
||||
// They're gated by CAP_SYS_NICE,
|
||||
// which we do not retain by default in containers.
|
||||
Name: "set_mempolicy",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// deny associating a thread with a namespace
|
||||
Name: "setns",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny setting the list of robust futexes
|
||||
Name: "set_robust_list",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Time/Date is not namespaced
|
||||
Name: "settimeofday",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny start/stop swapping to file/device
|
||||
Name: "swapon",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny start/stop swapping to file/device
|
||||
Name: "swapoff",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny read/write system parameters
|
||||
Name: "_sysctl",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Deny umount
|
||||
Name: "umount2",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Same as clone
|
||||
Name: "unshare",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
{
|
||||
// Older syscall related to shared libraries, unused for a long time
|
||||
Name: "uselib",
|
||||
Action: configs.Errno,
|
||||
Args: []*configs.Arg{},
|
||||
},
|
||||
},
|
||||
}
|
|
@ -2858,18 +2858,25 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) {
|
|||
testRequires(c, Apparmor, DaemonIsLinux, NotUserNamespace)
|
||||
|
||||
name := "acidburn"
|
||||
if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount"); err == nil || !strings.Contains(out, "Permission denied") {
|
||||
out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount")
|
||||
if err == nil ||
|
||||
!(strings.Contains(strings.ToLower(out), "permission denied") ||
|
||||
strings.Contains(strings.ToLower(out), "operation not permitted")) {
|
||||
c.Fatalf("unshare with --mount-proc should have failed with permission denied, got: %s, %v", out, err)
|
||||
}
|
||||
|
||||
name = "cereal"
|
||||
if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !strings.Contains(out, "Permission denied") {
|
||||
out, _, err = dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
|
||||
if err == nil ||
|
||||
!(strings.Contains(strings.ToLower(out), "permission denied") ||
|
||||
strings.Contains(strings.ToLower(out), "operation not permitted")) {
|
||||
c.Fatalf("unshare and mount of /proc should have failed with permission denied, got: %s, %v", out, err)
|
||||
}
|
||||
|
||||
/* Ensure still fails if running privileged with the default policy */
|
||||
name = "crashoverride"
|
||||
if out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) {
|
||||
out, _, err = dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
|
||||
if err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) {
|
||||
c.Fatalf("privileged unshare with apparmor should have failed with permission denied, got: %s, %v", out, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -548,3 +548,39 @@ func (s *DockerSuite) TestRunSeccompProfileDenyChmod(c *check.C) {
|
|||
c.Fatalf("expected chmod with seccomp profile denied to fail, got %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunSeccompProfileDenyUserns checks that 'docker run jess/unshare unshare --map-root-user --user sh -c whoami' exits with operation not permitted.
|
||||
func (s *DockerSuite) TestRunSeccompProfileDenyUserns(c *check.C) {
|
||||
testRequires(c, SameHostDaemon, seccompEnabled)
|
||||
// from sched.h
|
||||
jsonData := fmt.Sprintf(`{
|
||||
"defaultAction": "SCMP_ACT_ALLOW",
|
||||
"syscalls": [
|
||||
{
|
||||
"name": "unshare",
|
||||
"action": "SCMP_ACT_ERRNO",
|
||||
"args": [
|
||||
{
|
||||
"index": 0,
|
||||
"value": %d,
|
||||
"op": "SCMP_CMP_EQ"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`, uint64(0x10000000))
|
||||
tmpFile, err := ioutil.TempFile("", "profile.json")
|
||||
defer tmpFile.Close()
|
||||
if err != nil {
|
||||
c.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := tmpFile.Write([]byte(jsonData)); err != nil {
|
||||
c.Fatal(err)
|
||||
}
|
||||
runCmd := exec.Command(dockerBinary, "run", "--security-opt", "seccomp:"+tmpFile.Name(), "jess/unshare", "unshare", "--map-root-user", "--user", "sh", "-c", "whoami")
|
||||
out, _, _ := runCommandWithOutput(runCmd)
|
||||
if !strings.Contains(out, "Operation not permitted") {
|
||||
c.Fatalf("expected unshare userns with seccomp profile denied to fail, got %s", out)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue