1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Add default sysctls to allow ping sockets and privileged ports with no capabilities

Currently default capability CAP_NET_RAW allows users to open ICMP echo
sockets, and CAP_NET_BIND_SERVICE allows binding to ports under 1024.
Both of these are safe operations, and Linux now provides ways that
these can be set, per container, to be allowed without any capabilties
for non root users. Enable these by default. Users can revert to the
previous behaviour by overriding the sysctl values explicitly.

Signed-off-by: Justin Cormack <justin.cormack@docker.com>
This commit is contained in:
Justin Cormack 2020-05-26 15:58:24 +01:00
parent 41ac6bef8d
commit dae652e2e5
No known key found for this signature in database
GPG key ID: 2D9CA5D475D0EE4E
3 changed files with 67 additions and 3 deletions

View file

@ -716,6 +716,14 @@ func WithMounts(daemon *Daemon, c *container.Container) coci.SpecOpts {
}
}
// sysctlExists checks if a sysctl exists; runc will error if we add any that do not actually
// exist, so do not add the default ones if running on an old kernel.
func sysctlExists(s string) bool {
f := filepath.Join("/proc", "sys", strings.Replace(s, ".", "/", -1))
_, err := os.Stat(f)
return err == nil
}
// WithCommonOptions sets common docker options
func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
@ -768,6 +776,23 @@ func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
s.Hostname = c.Config.Hostname
setLinuxDomainname(c, s)
// Add default sysctls that are generally safe and useful; currently we
// grant the capabilities to allow these anyway. You can override if
// you want to restore the original behaviour.
// We do not set network sysctls if network namespace is host, or if we are
// joining an existing namespace, only if we create a new net namespace.
if c.HostConfig.NetworkMode.IsPrivate() {
// We cannot set up ping socket support in a user namespace
if !c.HostConfig.UsernsMode.IsPrivate() && sysctlExists("net.ipv4.ping_group_range") {
// allow unprivileged ICMP echo sockets without CAP_NET_RAW
s.Linux.Sysctl["net.ipv4.ping_group_range"] = "0 2147483647"
}
// allow opening any port less than 1024 without CAP_NET_BIND_SERVICE
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"] = "0"
}
}
return nil
}
}

View file

@ -114,7 +114,9 @@ func TestSysctlOverride(t *testing.T) {
Domainname: "baz.cyphar.com",
},
HostConfig: &containertypes.HostConfig{
Sysctls: map[string]string{},
NetworkMode: "bridge",
Sysctls: map[string]string{},
UsernsMode: "host",
},
}
d := setupFakeDaemon(t, c)
@ -125,15 +127,51 @@ func TestSysctlOverride(t *testing.T) {
assert.NilError(t, err)
assert.Equal(t, s.Hostname, "foobar")
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.Config.Domainname)
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
}
if sysctlExists("net.ipv4.ping_group_range") {
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
}
// Set an explicit sysctl.
c.HostConfig.Sysctls["kernel.domainname"] = "foobar.net"
assert.Assert(t, c.HostConfig.Sysctls["kernel.domainname"] != c.Config.Domainname)
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
s, err = d.createSpec(c)
assert.NilError(t, err)
assert.Equal(t, s.Hostname, "foobar")
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.HostConfig.Sysctls["kernel.domainname"])
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
}
// TestSysctlOverrideHost ensures that any implicit network sysctls are not set
// with host networking
func TestSysctlOverrideHost(t *testing.T) {
c := &container.Container{
Config: &containertypes.Config{},
HostConfig: &containertypes.HostConfig{
NetworkMode: "host",
Sysctls: map[string]string{},
UsernsMode: "host",
},
}
d := setupFakeDaemon(t, c)
defer cleanupFakeContainer(c)
// Ensure that the implicit sysctl is not set
s, err := d.createSpec(c)
assert.NilError(t, err)
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "")
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "")
// Set an explicit sysctl.
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
s, err = d.createSpec(c)
assert.NilError(t, err)
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
}
func TestGetSourceMount(t *testing.T) {

View file

@ -1252,12 +1252,13 @@ func (s *DockerSuite) TestUserNoEffectiveCapabilitiesNetBindService(c *testing.T
// test that a root user has default capability CAP_NET_BIND_SERVICE
dockerCmd(c, "run", "syscall-test", "socket-test")
// test that non root user does not have default capability CAP_NET_BIND_SERVICE
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "socket-test").Assert(c, icmd.Expected{
// as we allow this via sysctl, also tweak the sysctl back to default
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
ExitCode: 1,
Err: "Permission denied",
})
// test that root user can drop default capability CAP_NET_BIND_SERVICE
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "syscall-test", "socket-test").Assert(c, icmd.Expected{
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
ExitCode: 1,
Err: "Permission denied",
})