Add support for setting sysctls

This patch will allow users to specify namespace specific "kernel parameters"
for running inside of a container.

Signed-off-by: Dan Walsh <dwalsh@redhat.com>
This commit is contained in:
Dan Walsh 2016-03-29 08:24:28 -04:00
parent 7b5a6844c2
commit 9caf7aeefd
12 changed files with 157 additions and 0 deletions

View File

@ -1671,6 +1671,7 @@ _docker_run() {
--shm-size
--stop-signal
--tmpfs
--sysctl
--ulimit
--user -u
--userns

View File

@ -644,6 +644,7 @@ __docker_subcommand() {
"($help)--privileged[Give extended privileges to this container]"
"($help)--read-only[Mount the container's root filesystem as read only]"
"($help)*--security-opt=[Security options]:security option: "
"($help)*--sysctl=-[sysctl options]:sysctl: "
"($help -t --tty)"{-t,--tty}"[Allocate a pseudo-tty]"
"($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
"($help)--tmpfs[mount tmpfs]"

View File

@ -611,6 +611,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, e
return nil, fmt.Errorf("linux runtime spec resources: %v", err)
}
s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
s.Linux.Sysctl = c.HostConfig.Sysctls
if err := setDevices(&s, c); err != nil {
return nil, fmt.Errorf("linux runtime spec devices: %v", err)
}

View File

@ -176,6 +176,7 @@ This section lists each version from latest to oldest. Each listing includes a
[Docker Remote API v1.21](docker_remote_api_v1.21.md) documentation
* `POST /containers/create` and `POST /containers/(id)/start` allow you to configure kernel parameters (sysctls) for use in the container.
* `GET /volumes` lists volumes from all volume drivers.
* `POST /volumes/create` to create a volume.
* `GET /volumes/(name)` get low-level information about a volume.

View File

@ -199,6 +199,7 @@ Create a container
"RestartPolicy": { "Name": "", "MaximumRetryCount": 0 },
"NetworkMode": "bridge",
"Devices": [],
"Sysctls": { "net.ipv4.ip_forward": "1" },
"Ulimits": [{}],
"LogConfig": { "Type": "json-file", "Config": {} },
"SecurityOpt": [],
@ -306,6 +307,10 @@ Json Parameters:
- **Devices** - A list of devices to add to the container specified as a JSON object in the
form
`{ "PathOnHost": "/dev/deviceName", "PathInContainer": "/dev/deviceName", "CgroupPermissions": "mrw"}`
- **Sysctls** - A list of kernel parameters (sysctls) to set in the container, specified as
`{ <name>: <Value> }`, for example:
`{ "net.ipv4.ip_forward": "1" }`
- **Ulimits** - A list of ulimits to set in the container, specified as
`{ "Name": <name>, "Soft": <soft limit>, "Hard": <hard limit> }`, for example:
`Ulimits: { "Name": "nofile", "Soft": 1024, "Hard": 2048 }`
@ -426,6 +431,9 @@ Return low-level information on the container `id`
"Type": "json-file"
},
"SecurityOpt": null,
"Sysctls": {
"net.ipv4.ip_forward": "1"
},
"VolumesFrom": null,
"Ulimits": [{}],
"VolumeDriver": ""

View File

@ -82,6 +82,7 @@ Creates a new container.
--stop-signal="SIGTERM" Signal to stop a container
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
--storage-opt=[] Set storage driver options per container
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
-t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID
--userns="" Container user namespace

View File

@ -84,6 +84,7 @@ parent = "smn_cli"
--sig-proxy=true Proxy received signals to the process
--stop-signal="SIGTERM" Signal to stop a container
--storage-opt=[] Set storage driver options per container
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
-t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
--userns="" Container user namespace
@ -620,3 +621,30 @@ If you have set the `--exec-opt isolation=hyperv` option on the Docker `daemon`,
$ docker run -d --isolation default busybox top
$ docker run -d --isolation hyperv busybox top
```
### Configure namespaced kernel parameters (sysctls) at runtime
The `--sysctl` sets namespaced kernel parameters (sysctls) in the
container. For example, to turn on IP forwarding in the containers
network namespace, run this command:
$ docker run --sysctl net.ipv4.ip_forward=1 someimage
> **Note**: Not all sysctls are namespaced. docker does not support changing sysctls
> inside of a container that also modify the host system. As the kernel
> evolves we expect to see more sysctls become namespaced.
#### Currently supported sysctls
`IPC Namespace`:
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*
If you use the `--ipc=host` option these sysctls will not be allowed.
`Network Namespace`:
Sysctls beginning with net.*
If you use the `--net=host` option using these sysctls will not be allowed.

View File

@ -4,6 +4,7 @@ package main
import (
"bufio"
"encoding/json"
"fmt"
"io/ioutil"
"os"
@ -747,6 +748,37 @@ func (s *DockerSuite) TestRunTmpfsMounts(c *check.C) {
}
}
func (s *DockerSuite) TestRunSysctls(c *check.C) {
testRequires(c, DaemonIsLinux)
var err error
out, _ := dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=1", "--name", "test", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
c.Assert(strings.TrimSpace(out), check.Equals, "1")
out = inspectFieldJSON(c, "test", "HostConfig.Sysctls")
sysctls := make(map[string]string)
err = json.Unmarshal([]byte(out), &sysctls)
c.Assert(err, check.IsNil)
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "1")
out, _ = dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=0", "--name", "test1", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
c.Assert(strings.TrimSpace(out), check.Equals, "0")
out = inspectFieldJSON(c, "test1", "HostConfig.Sysctls")
err = json.Unmarshal([]byte(out), &sysctls)
c.Assert(err, check.IsNil)
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "0")
runCmd := exec.Command(dockerBinary, "run", "--sysctl", "kernel.foobar=1", "--name", "test2", "busybox", "cat", "/proc/sys/kernel/foobar")
out, _, _ = runCommandWithOutput(runCmd)
if !strings.Contains(out, "invalid value") {
c.Fatalf("expected --sysctl to fail, got %s", out)
}
}
// TestRunSeccompProfileDenyUnshare checks that 'docker run --security-opt seccomp=/tmp/profile.json debian:jessie unshare' exits with operation not permitted.
func (s *DockerSuite) TestRunSeccompProfileDenyUnshare(c *check.C) {
testRequires(c, SameHostDaemon, seccompEnabled, NotArm, Apparmor)

View File

@ -67,6 +67,7 @@ docker-create - Create a new container
[**--storage-opt**[=*[]*]]
[**--stop-signal**[=*SIGNAL*]]
[**--shm-size**[=*[]*]]
[**--sysctl**[=*[]*]]
[**-t**|**--tty**]
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
[**-u**|**--user**[=*USER*]]
@ -336,6 +337,21 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
**--stop-signal**=*SIGTERM*
Signal to stop a container. Default is SIGTERM.
**--sysctl**=SYSCTL
Configure namespaced kernel parameters at runtime
IPC Namespace - current sysctls allowed:
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*
Note: if you use --ipc=host using these sysctls will not be allowed.
Network Namespace - current sysctls allowed:
Sysctls beginning with net.*
Note: if you use --net=host using these sysctls will not be allowed.
**-t**, **--tty**=*true*|*false*
Allocate a pseudo-TTY. The default is *false*.

View File

@ -71,6 +71,7 @@ docker-run - Run a command in a new container
[**--stop-signal**[=*SIGNAL*]]
[**--shm-size**[=*[]*]]
[**--sig-proxy**[=*true*]]
[**--sysctl**[=*[]*]]
[**-t**|**--tty**]
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
[**-u**|**--user**[=*USER*]]
@ -492,6 +493,21 @@ its root filesystem mounted as read only prohibiting any writes.
`number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).
If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
**--sysctl**=SYSCTL
Configure namespaced kernel parameters at runtime
IPC Namespace - current sysctls allowed:
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*
If you use the `--ipc=host` option these sysctls will not be allowed.
Network Namespace - current sysctls allowed:
Sysctls beginning with net.*
If you use the `--net=host` option these sysctls will not be allowed.
**--sig-proxy**=*true*|*false*
Proxy received signals to the process (non-TTY mode only). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. The default is *true*.
@ -955,6 +971,23 @@ $ docker run -d --isolation default busybox top
$ docker run -d --isolation hyperv busybox top
```
## Setting Namespaced Kernel Parameters (Sysctls)
The `--sysctl` sets namespaced kernel parameters (sysctls) in the
container. For example, to turn on IP forwarding in the containers
network namespace, run this command:
$ docker run --sysctl net.ipv4.ip_forward=1 someimage
Note:
Not all sysctls are namespaced. docker does not support changing sysctls
inside of a container that also modify the host system. As the kernel
evolves we expect to see more sysctls become namespaced.
See the definition of the `--sysctl` option above for the current list of
supported sysctls.
# HISTORY
April 2014, Originally compiled by William Henry (whenry at redhat dot com)
based on docker.com source material and internal work.

View File

@ -240,3 +240,35 @@ func ValidateLabel(val string) (string, error) {
}
return val, nil
}
// ValidateSysctl validates an sysctl and returns it.
func ValidateSysctl(val string) (string, error) {
validSysctlMap := map[string]bool{
"kernel.msgmax": true,
"kernel.msgmnb": true,
"kernel.msgmni": true,
"kernel.sem": true,
"kernel.shmall": true,
"kernel.shmmax": true,
"kernel.shmmni": true,
"kernel.shm_rmid_forced": true,
}
validSysctlPrefixes := []string{
"net.",
"fs.mqueue.",
}
arr := strings.Split(val, "=")
if len(arr) < 2 {
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
}
if validSysctlMap[arr[0]] {
return val, nil
}
for _, vp := range validSysctlPrefixes {
if strings.HasPrefix(arr[0], vp) {
return val, nil
}
}
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
}

View File

@ -42,6 +42,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
flDevices = opts.NewListOpts(ValidateDevice)
flUlimits = NewUlimitOpt(nil)
flSysctls = opts.NewMapOpts(nil, opts.ValidateSysctl)
flPublish = opts.NewListOpts(nil)
flExpose = opts.NewListOpts(nil)
@ -127,6 +128,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
cmd.Var(&flSecurityOpt, []string{"-security-opt"}, "Security Options")
cmd.Var(&flStorageOpt, []string{"-storage-opt"}, "Set storage driver options per container")
cmd.Var(flUlimits, []string{"-ulimit"}, "Ulimit options")
cmd.Var(flSysctls, []string{"-sysctl"}, "Sysctl options")
cmd.Var(&flLoggingOpts, []string{"-log-opt"}, "Log driver options")
cmd.Require(flag.Min, 1)
@ -430,6 +432,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
ShmSize: shmSize,
Resources: resources,
Tmpfs: tmpfs,
Sysctls: flSysctls.GetAll(),
}
// When allocating stdin in attached mode, close stdin at client disconnect