From 947293a28084cb5ee2e10e4d128c6e2b9d9da89d Mon Sep 17 00:00:00 2001 From: Jessica Frazelle Date: Fri, 18 Dec 2015 10:01:58 -0800 Subject: [PATCH] set default seccomp profile Signed-off-by: Jessica Frazelle --- daemon/execdriver/native/create.go | 5 + daemon/execdriver/native/seccomp.go | 4 + daemon/execdriver/native/seccomp_default.go | 319 ++++++++++++++++++++ integration-cli/docker_cli_run_test.go | 13 +- integration-cli/docker_cli_run_unix_test.go | 36 +++ 5 files changed, 374 insertions(+), 3 deletions(-) create mode 100644 daemon/execdriver/native/seccomp_default.go diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 0154801382..39aec5058f 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -69,6 +69,10 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) if err := d.setCapabilities(container, c); err != nil { return nil, err } + + if c.SeccompProfile == "" { + container.Seccomp = getDefaultSeccompProfile() + } } // add CAP_ prefix to all caps for new libcontainer update to match // the spec format. @@ -89,6 +93,7 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) return nil, err } } + if err := execdriver.SetupCgroups(container, c); err != nil { return nil, err } diff --git a/daemon/execdriver/native/seccomp.go b/daemon/execdriver/native/seccomp.go index b6c5ae0d7d..c34651a1ae 100644 --- a/daemon/execdriver/native/seccomp.go +++ b/daemon/execdriver/native/seccomp.go @@ -12,6 +12,10 @@ import ( "github.com/opencontainers/specs" ) +func getDefaultSeccompProfile() *configs.Seccomp { + return defaultSeccompProfile +} + func loadSeccompProfile(path string) (*configs.Seccomp, error) { f, err := ioutil.ReadFile(path) if err != nil { diff --git a/daemon/execdriver/native/seccomp_default.go b/daemon/execdriver/native/seccomp_default.go new file mode 100644 index 0000000000..dbd00312a3 --- /dev/null +++ b/daemon/execdriver/native/seccomp_default.go @@ -0,0 +1,319 @@ +// +build linux + +package native + +import "github.com/opencontainers/runc/libcontainer/configs" + +var defaultSeccompProfile = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + // Quota and Accounting syscalls which could let containers + // disable their own resource limits or process accounting + Name: "acct", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Prevent containers from using the kernel keyring, + // which is not namespaced + Name: "add_key", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Similar to clock_settime and settimeofday + // Time/Date is not namespaced + Name: "adjtimex", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Time/Date is not namespaced + Name: "clock_settime", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny cloning new namespaces + Name: "clone", + Action: configs.Errno, + Args: []*configs.Arg{ + { + // flags from sched.h + // CLONE_NEWUTS 0x04000000 + // CLONE_NEWIPC 0x08000000 + // CLONE_NEWUSER 0x10000000 + // CLONE_NEWPID 0x20000000 + // CLONE_NEWNET 0x40000000 + Index: 0, + Value: uint64(0x04000000), + Op: configs.GreaterThanOrEqualTo, + }, + { + // flags from sched.h + // CLONE_NEWNS 0x00020000 + Index: 0, + Value: uint64(0x00020000), + Op: configs.EqualTo, + }, + }, + }, + { + // Deny manipulation and functions on kernel modules. + Name: "create_module", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny manipulation and functions on kernel modules. + Name: "delete_module", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny retrieval of exported kernel and module symbols + Name: "get_kernel_syms", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Terrifying syscalls that modify kernel memory and NUMA settings. + // They're gated by CAP_SYS_NICE, + // which we do not retain by default in containers. + Name: "get_mempolicy", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny getting the list of robust futexes + Name: "get_robust_list", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny manipulation and functions on kernel modules. + Name: "init_module", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Prevent containers from modifying kernel I/O privilege levels. + // Already restricted as containers drop CAP_SYS_RAWIO by default. + Name: "ioperm", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Prevent containers from modifying kernel I/O privilege levels. + // Already restricted as containers drop CAP_SYS_RAWIO by default. + Name: "iopl", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Sister syscall of kexec_load that does the same thing, + // slightly different arguments + Name: "kexec_file_load", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny loading a new kernel for later execution + Name: "kexec_load", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Prevent containers from using the kernel keyring, + // which is not namespaced + Name: "keyctl", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Tracing/profiling syscalls, + // which could leak a lot of information on the host + Name: "lookup_dcookie", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Terrifying syscalls that modify kernel memory and NUMA settings. + // They're gated by CAP_SYS_NICE, + // which we do not retain by default in containers. + Name: "mbind", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Terrifying syscalls that modify kernel memory and NUMA settings. + // They're gated by CAP_SYS_NICE, + // which we do not retain by default in containers. + Name: "migrate_pages", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Old syscall only used in 16-bit code, + // and a potential information leak + Name: "modify_ldt", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny mount + Name: "mount", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Terrifying syscalls that modify kernel memory and NUMA settings. + // They're gated by CAP_SYS_NICE, + // which we do not retain by default in containers. + Name: "move_pages", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny interaction with the kernel nfs daemon + Name: "nfsservctl", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Cause of an old container breakout, + // might as well restrict it to be on the safe side + Name: "open_by_handle_at", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Tracing/profiling syscalls, + // which could leak a lot of information on the host + Name: "perf_event_open", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Prevent container from enabling BSD emulation. + // Not inherently dangerous, but poorly tested, + // potential for a lot of kernel vulns in this. + Name: "personality", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny pivot_root + Name: "pivot_root", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Already blocked by dropping CAP_PTRACE + Name: "ptrace", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny manipulation and functions on kernel modules. + Name: "query_module", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Quota and Accounting syscalls which could let containers + // disable their own resource limits or process accounting + Name: "quotactl", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Probably a bad idea to let containers reboot the host + Name: "reboot", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Probably a bad idea to let containers restart + Name: "restart_syscall", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Prevent containers from using the kernel keyring, + // which is not namespaced + Name: "request_key", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // meta, deny seccomp + Name: "seccomp", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Terrifying syscalls that modify kernel memory and NUMA settings. + // They're gated by CAP_SYS_NICE, + // which we do not retain by default in containers. + Name: "set_mempolicy", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // deny associating a thread with a namespace + Name: "setns", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny setting the list of robust futexes + Name: "set_robust_list", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Time/Date is not namespaced + Name: "settimeofday", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny start/stop swapping to file/device + Name: "swapon", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny start/stop swapping to file/device + Name: "swapoff", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny read/write system parameters + Name: "_sysctl", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Deny umount + Name: "umount2", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Same as clone + Name: "unshare", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + { + // Older syscall related to shared libraries, unused for a long time + Name: "uselib", + Action: configs.Errno, + Args: []*configs.Arg{}, + }, + }, +} diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index e696078950..cfa04f4d4c 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -2858,18 +2858,25 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) { testRequires(c, Apparmor, DaemonIsLinux, NotUserNamespace) name := "acidburn" - if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount"); err == nil || !strings.Contains(out, "Permission denied") { + out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount") + if err == nil || + !(strings.Contains(strings.ToLower(out), "permission denied") || + strings.Contains(strings.ToLower(out), "operation not permitted")) { c.Fatalf("unshare with --mount-proc should have failed with permission denied, got: %s, %v", out, err) } name = "cereal" - if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !strings.Contains(out, "Permission denied") { + out, _, err = dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc") + if err == nil || + !(strings.Contains(strings.ToLower(out), "permission denied") || + strings.Contains(strings.ToLower(out), "operation not permitted")) { c.Fatalf("unshare and mount of /proc should have failed with permission denied, got: %s, %v", out, err) } /* Ensure still fails if running privileged with the default policy */ name = "crashoverride" - if out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) { + out, _, err = dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc") + if err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) { c.Fatalf("privileged unshare with apparmor should have failed with permission denied, got: %s, %v", out, err) } } diff --git a/integration-cli/docker_cli_run_unix_test.go b/integration-cli/docker_cli_run_unix_test.go index 16607bd8ce..1b44cfed64 100644 --- a/integration-cli/docker_cli_run_unix_test.go +++ b/integration-cli/docker_cli_run_unix_test.go @@ -548,3 +548,39 @@ func (s *DockerSuite) TestRunSeccompProfileDenyChmod(c *check.C) { c.Fatalf("expected chmod with seccomp profile denied to fail, got %s", out) } } + +// TestRunSeccompProfileDenyUserns checks that 'docker run jess/unshare unshare --map-root-user --user sh -c whoami' exits with operation not permitted. +func (s *DockerSuite) TestRunSeccompProfileDenyUserns(c *check.C) { + testRequires(c, SameHostDaemon, seccompEnabled) + // from sched.h + jsonData := fmt.Sprintf(`{ + "defaultAction": "SCMP_ACT_ALLOW", + "syscalls": [ + { + "name": "unshare", + "action": "SCMP_ACT_ERRNO", + "args": [ + { + "index": 0, + "value": %d, + "op": "SCMP_CMP_EQ" + } + ] + } + ] +}`, uint64(0x10000000)) + tmpFile, err := ioutil.TempFile("", "profile.json") + defer tmpFile.Close() + if err != nil { + c.Fatal(err) + } + + if _, err := tmpFile.Write([]byte(jsonData)); err != nil { + c.Fatal(err) + } + runCmd := exec.Command(dockerBinary, "run", "--security-opt", "seccomp:"+tmpFile.Name(), "jess/unshare", "unshare", "--map-root-user", "--user", "sh", "-c", "whoami") + out, _, _ := runCommandWithOutput(runCmd) + if !strings.Contains(out, "Operation not permitted") { + c.Fatalf("expected unshare userns with seccomp profile denied to fail, got %s", out) + } +}