mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Add default sysctls to allow ping sockets and privileged ports with no capabilities
Currently default capability CAP_NET_RAW allows users to open ICMP echo sockets, and CAP_NET_BIND_SERVICE allows binding to ports under 1024. Both of these are safe operations, and Linux now provides ways that these can be set, per container, to be allowed without any capabilties for non root users. Enable these by default. Users can revert to the previous behaviour by overriding the sysctl values explicitly. Signed-off-by: Justin Cormack <justin.cormack@docker.com>
This commit is contained in:
parent
41ac6bef8d
commit
dae652e2e5
3 changed files with 67 additions and 3 deletions
|
@ -716,6 +716,14 @@ func WithMounts(daemon *Daemon, c *container.Container) coci.SpecOpts {
|
|||
}
|
||||
}
|
||||
|
||||
// sysctlExists checks if a sysctl exists; runc will error if we add any that do not actually
|
||||
// exist, so do not add the default ones if running on an old kernel.
|
||||
func sysctlExists(s string) bool {
|
||||
f := filepath.Join("/proc", "sys", strings.Replace(s, ".", "/", -1))
|
||||
_, err := os.Stat(f)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// WithCommonOptions sets common docker options
|
||||
func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
|
||||
return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
|
||||
|
@ -768,6 +776,23 @@ func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
|
|||
s.Hostname = c.Config.Hostname
|
||||
setLinuxDomainname(c, s)
|
||||
|
||||
// Add default sysctls that are generally safe and useful; currently we
|
||||
// grant the capabilities to allow these anyway. You can override if
|
||||
// you want to restore the original behaviour.
|
||||
// We do not set network sysctls if network namespace is host, or if we are
|
||||
// joining an existing namespace, only if we create a new net namespace.
|
||||
if c.HostConfig.NetworkMode.IsPrivate() {
|
||||
// We cannot set up ping socket support in a user namespace
|
||||
if !c.HostConfig.UsernsMode.IsPrivate() && sysctlExists("net.ipv4.ping_group_range") {
|
||||
// allow unprivileged ICMP echo sockets without CAP_NET_RAW
|
||||
s.Linux.Sysctl["net.ipv4.ping_group_range"] = "0 2147483647"
|
||||
}
|
||||
// allow opening any port less than 1024 without CAP_NET_BIND_SERVICE
|
||||
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
|
||||
s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"] = "0"
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -114,7 +114,9 @@ func TestSysctlOverride(t *testing.T) {
|
|||
Domainname: "baz.cyphar.com",
|
||||
},
|
||||
HostConfig: &containertypes.HostConfig{
|
||||
Sysctls: map[string]string{},
|
||||
NetworkMode: "bridge",
|
||||
Sysctls: map[string]string{},
|
||||
UsernsMode: "host",
|
||||
},
|
||||
}
|
||||
d := setupFakeDaemon(t, c)
|
||||
|
@ -125,15 +127,51 @@ func TestSysctlOverride(t *testing.T) {
|
|||
assert.NilError(t, err)
|
||||
assert.Equal(t, s.Hostname, "foobar")
|
||||
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.Config.Domainname)
|
||||
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
|
||||
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
|
||||
}
|
||||
if sysctlExists("net.ipv4.ping_group_range") {
|
||||
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
|
||||
}
|
||||
|
||||
// Set an explicit sysctl.
|
||||
c.HostConfig.Sysctls["kernel.domainname"] = "foobar.net"
|
||||
assert.Assert(t, c.HostConfig.Sysctls["kernel.domainname"] != c.Config.Domainname)
|
||||
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
|
||||
|
||||
s, err = d.createSpec(c)
|
||||
assert.NilError(t, err)
|
||||
assert.Equal(t, s.Hostname, "foobar")
|
||||
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.HostConfig.Sysctls["kernel.domainname"])
|
||||
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
|
||||
}
|
||||
|
||||
// TestSysctlOverrideHost ensures that any implicit network sysctls are not set
|
||||
// with host networking
|
||||
func TestSysctlOverrideHost(t *testing.T) {
|
||||
c := &container.Container{
|
||||
Config: &containertypes.Config{},
|
||||
HostConfig: &containertypes.HostConfig{
|
||||
NetworkMode: "host",
|
||||
Sysctls: map[string]string{},
|
||||
UsernsMode: "host",
|
||||
},
|
||||
}
|
||||
d := setupFakeDaemon(t, c)
|
||||
defer cleanupFakeContainer(c)
|
||||
|
||||
// Ensure that the implicit sysctl is not set
|
||||
s, err := d.createSpec(c)
|
||||
assert.NilError(t, err)
|
||||
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "")
|
||||
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "")
|
||||
|
||||
// Set an explicit sysctl.
|
||||
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
|
||||
|
||||
s, err = d.createSpec(c)
|
||||
assert.NilError(t, err)
|
||||
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
|
||||
}
|
||||
|
||||
func TestGetSourceMount(t *testing.T) {
|
||||
|
|
|
@ -1252,12 +1252,13 @@ func (s *DockerSuite) TestUserNoEffectiveCapabilitiesNetBindService(c *testing.T
|
|||
// test that a root user has default capability CAP_NET_BIND_SERVICE
|
||||
dockerCmd(c, "run", "syscall-test", "socket-test")
|
||||
// test that non root user does not have default capability CAP_NET_BIND_SERVICE
|
||||
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "socket-test").Assert(c, icmd.Expected{
|
||||
// as we allow this via sysctl, also tweak the sysctl back to default
|
||||
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
|
||||
ExitCode: 1,
|
||||
Err: "Permission denied",
|
||||
})
|
||||
// test that root user can drop default capability CAP_NET_BIND_SERVICE
|
||||
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "syscall-test", "socket-test").Assert(c, icmd.Expected{
|
||||
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
|
||||
ExitCode: 1,
|
||||
Err: "Permission denied",
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue