1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Add support for setting sysctls

This patch will allow users to specify namespace specific "kernel parameters"
for running inside of a container.

Signed-off-by: Dan Walsh <dwalsh@redhat.com>
This commit is contained in:
Dan Walsh 2016-03-29 08:24:28 -04:00
parent 7b5a6844c2
commit 9caf7aeefd
12 changed files with 157 additions and 0 deletions

View file

@ -1671,6 +1671,7 @@ _docker_run() {
--shm-size --shm-size
--stop-signal --stop-signal
--tmpfs --tmpfs
--sysctl
--ulimit --ulimit
--user -u --user -u
--userns --userns

View file

@ -644,6 +644,7 @@ __docker_subcommand() {
"($help)--privileged[Give extended privileges to this container]" "($help)--privileged[Give extended privileges to this container]"
"($help)--read-only[Mount the container's root filesystem as read only]" "($help)--read-only[Mount the container's root filesystem as read only]"
"($help)*--security-opt=[Security options]:security option: " "($help)*--security-opt=[Security options]:security option: "
"($help)*--sysctl=-[sysctl options]:sysctl: "
"($help -t --tty)"{-t,--tty}"[Allocate a pseudo-tty]" "($help -t --tty)"{-t,--tty}"[Allocate a pseudo-tty]"
"($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users" "($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
"($help)--tmpfs[mount tmpfs]" "($help)--tmpfs[mount tmpfs]"

View file

@ -611,6 +611,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, e
return nil, fmt.Errorf("linux runtime spec resources: %v", err) return nil, fmt.Errorf("linux runtime spec resources: %v", err)
} }
s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
s.Linux.Sysctl = c.HostConfig.Sysctls
if err := setDevices(&s, c); err != nil { if err := setDevices(&s, c); err != nil {
return nil, fmt.Errorf("linux runtime spec devices: %v", err) return nil, fmt.Errorf("linux runtime spec devices: %v", err)
} }

View file

@ -176,6 +176,7 @@ This section lists each version from latest to oldest. Each listing includes a
[Docker Remote API v1.21](docker_remote_api_v1.21.md) documentation [Docker Remote API v1.21](docker_remote_api_v1.21.md) documentation
* `POST /containers/create` and `POST /containers/(id)/start` allow you to configure kernel parameters (sysctls) for use in the container.
* `GET /volumes` lists volumes from all volume drivers. * `GET /volumes` lists volumes from all volume drivers.
* `POST /volumes/create` to create a volume. * `POST /volumes/create` to create a volume.
* `GET /volumes/(name)` get low-level information about a volume. * `GET /volumes/(name)` get low-level information about a volume.

View file

@ -199,6 +199,7 @@ Create a container
"RestartPolicy": { "Name": "", "MaximumRetryCount": 0 }, "RestartPolicy": { "Name": "", "MaximumRetryCount": 0 },
"NetworkMode": "bridge", "NetworkMode": "bridge",
"Devices": [], "Devices": [],
"Sysctls": { "net.ipv4.ip_forward": "1" },
"Ulimits": [{}], "Ulimits": [{}],
"LogConfig": { "Type": "json-file", "Config": {} }, "LogConfig": { "Type": "json-file", "Config": {} },
"SecurityOpt": [], "SecurityOpt": [],
@ -306,6 +307,10 @@ Json Parameters:
- **Devices** - A list of devices to add to the container specified as a JSON object in the - **Devices** - A list of devices to add to the container specified as a JSON object in the
form form
`{ "PathOnHost": "/dev/deviceName", "PathInContainer": "/dev/deviceName", "CgroupPermissions": "mrw"}` `{ "PathOnHost": "/dev/deviceName", "PathInContainer": "/dev/deviceName", "CgroupPermissions": "mrw"}`
- **Sysctls** - A list of kernel parameters (sysctls) to set in the container, specified as
`{ <name>: <Value> }`, for example:
`{ "net.ipv4.ip_forward": "1" }`
- **Ulimits** - A list of ulimits to set in the container, specified as - **Ulimits** - A list of ulimits to set in the container, specified as
`{ "Name": <name>, "Soft": <soft limit>, "Hard": <hard limit> }`, for example: `{ "Name": <name>, "Soft": <soft limit>, "Hard": <hard limit> }`, for example:
`Ulimits: { "Name": "nofile", "Soft": 1024, "Hard": 2048 }` `Ulimits: { "Name": "nofile", "Soft": 1024, "Hard": 2048 }`
@ -426,6 +431,9 @@ Return low-level information on the container `id`
"Type": "json-file" "Type": "json-file"
}, },
"SecurityOpt": null, "SecurityOpt": null,
"Sysctls": {
"net.ipv4.ip_forward": "1"
},
"VolumesFrom": null, "VolumesFrom": null,
"Ulimits": [{}], "Ulimits": [{}],
"VolumeDriver": "" "VolumeDriver": ""

View file

@ -82,6 +82,7 @@ Creates a new container.
--stop-signal="SIGTERM" Signal to stop a container --stop-signal="SIGTERM" Signal to stop a container
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`. --shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
--storage-opt=[] Set storage driver options per container --storage-opt=[] Set storage driver options per container
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
-t, --tty Allocate a pseudo-TTY -t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID -u, --user="" Username or UID
--userns="" Container user namespace --userns="" Container user namespace

View file

@ -84,6 +84,7 @@ parent = "smn_cli"
--sig-proxy=true Proxy received signals to the process --sig-proxy=true Proxy received signals to the process
--stop-signal="SIGTERM" Signal to stop a container --stop-signal="SIGTERM" Signal to stop a container
--storage-opt=[] Set storage driver options per container --storage-opt=[] Set storage driver options per container
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
-t, --tty Allocate a pseudo-TTY -t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>]) -u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
--userns="" Container user namespace --userns="" Container user namespace
@ -620,3 +621,30 @@ If you have set the `--exec-opt isolation=hyperv` option on the Docker `daemon`,
$ docker run -d --isolation default busybox top $ docker run -d --isolation default busybox top
$ docker run -d --isolation hyperv busybox top $ docker run -d --isolation hyperv busybox top
``` ```
### Configure namespaced kernel parameters (sysctls) at runtime
The `--sysctl` sets namespaced kernel parameters (sysctls) in the
container. For example, to turn on IP forwarding in the containers
network namespace, run this command:
$ docker run --sysctl net.ipv4.ip_forward=1 someimage
> **Note**: Not all sysctls are namespaced. docker does not support changing sysctls
> inside of a container that also modify the host system. As the kernel
> evolves we expect to see more sysctls become namespaced.
#### Currently supported sysctls
`IPC Namespace`:
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*
If you use the `--ipc=host` option these sysctls will not be allowed.
`Network Namespace`:
Sysctls beginning with net.*
If you use the `--net=host` option using these sysctls will not be allowed.

View file

@ -4,6 +4,7 @@ package main
import ( import (
"bufio" "bufio"
"encoding/json"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
@ -747,6 +748,37 @@ func (s *DockerSuite) TestRunTmpfsMounts(c *check.C) {
} }
} }
func (s *DockerSuite) TestRunSysctls(c *check.C) {
testRequires(c, DaemonIsLinux)
var err error
out, _ := dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=1", "--name", "test", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
c.Assert(strings.TrimSpace(out), check.Equals, "1")
out = inspectFieldJSON(c, "test", "HostConfig.Sysctls")
sysctls := make(map[string]string)
err = json.Unmarshal([]byte(out), &sysctls)
c.Assert(err, check.IsNil)
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "1")
out, _ = dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=0", "--name", "test1", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
c.Assert(strings.TrimSpace(out), check.Equals, "0")
out = inspectFieldJSON(c, "test1", "HostConfig.Sysctls")
err = json.Unmarshal([]byte(out), &sysctls)
c.Assert(err, check.IsNil)
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "0")
runCmd := exec.Command(dockerBinary, "run", "--sysctl", "kernel.foobar=1", "--name", "test2", "busybox", "cat", "/proc/sys/kernel/foobar")
out, _, _ = runCommandWithOutput(runCmd)
if !strings.Contains(out, "invalid value") {
c.Fatalf("expected --sysctl to fail, got %s", out)
}
}
// TestRunSeccompProfileDenyUnshare checks that 'docker run --security-opt seccomp=/tmp/profile.json debian:jessie unshare' exits with operation not permitted. // TestRunSeccompProfileDenyUnshare checks that 'docker run --security-opt seccomp=/tmp/profile.json debian:jessie unshare' exits with operation not permitted.
func (s *DockerSuite) TestRunSeccompProfileDenyUnshare(c *check.C) { func (s *DockerSuite) TestRunSeccompProfileDenyUnshare(c *check.C) {
testRequires(c, SameHostDaemon, seccompEnabled, NotArm, Apparmor) testRequires(c, SameHostDaemon, seccompEnabled, NotArm, Apparmor)

View file

@ -67,6 +67,7 @@ docker-create - Create a new container
[**--storage-opt**[=*[]*]] [**--storage-opt**[=*[]*]]
[**--stop-signal**[=*SIGNAL*]] [**--stop-signal**[=*SIGNAL*]]
[**--shm-size**[=*[]*]] [**--shm-size**[=*[]*]]
[**--sysctl**[=*[]*]]
[**-t**|**--tty**] [**-t**|**--tty**]
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]] [**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
[**-u**|**--user**[=*USER*]] [**-u**|**--user**[=*USER*]]
@ -336,6 +337,21 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
**--stop-signal**=*SIGTERM* **--stop-signal**=*SIGTERM*
Signal to stop a container. Default is SIGTERM. Signal to stop a container. Default is SIGTERM.
**--sysctl**=SYSCTL
Configure namespaced kernel parameters at runtime
IPC Namespace - current sysctls allowed:
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*
Note: if you use --ipc=host using these sysctls will not be allowed.
Network Namespace - current sysctls allowed:
Sysctls beginning with net.*
Note: if you use --net=host using these sysctls will not be allowed.
**-t**, **--tty**=*true*|*false* **-t**, **--tty**=*true*|*false*
Allocate a pseudo-TTY. The default is *false*. Allocate a pseudo-TTY. The default is *false*.

View file

@ -71,6 +71,7 @@ docker-run - Run a command in a new container
[**--stop-signal**[=*SIGNAL*]] [**--stop-signal**[=*SIGNAL*]]
[**--shm-size**[=*[]*]] [**--shm-size**[=*[]*]]
[**--sig-proxy**[=*true*]] [**--sig-proxy**[=*true*]]
[**--sysctl**[=*[]*]]
[**-t**|**--tty**] [**-t**|**--tty**]
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]] [**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
[**-u**|**--user**[=*USER*]] [**-u**|**--user**[=*USER*]]
@ -492,6 +493,21 @@ its root filesystem mounted as read only prohibiting any writes.
`number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes). `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).
If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`. If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
**--sysctl**=SYSCTL
Configure namespaced kernel parameters at runtime
IPC Namespace - current sysctls allowed:
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*
If you use the `--ipc=host` option these sysctls will not be allowed.
Network Namespace - current sysctls allowed:
Sysctls beginning with net.*
If you use the `--net=host` option these sysctls will not be allowed.
**--sig-proxy**=*true*|*false* **--sig-proxy**=*true*|*false*
Proxy received signals to the process (non-TTY mode only). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. The default is *true*. Proxy received signals to the process (non-TTY mode only). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. The default is *true*.
@ -955,6 +971,23 @@ $ docker run -d --isolation default busybox top
$ docker run -d --isolation hyperv busybox top $ docker run -d --isolation hyperv busybox top
``` ```
## Setting Namespaced Kernel Parameters (Sysctls)
The `--sysctl` sets namespaced kernel parameters (sysctls) in the
container. For example, to turn on IP forwarding in the containers
network namespace, run this command:
$ docker run --sysctl net.ipv4.ip_forward=1 someimage
Note:
Not all sysctls are namespaced. docker does not support changing sysctls
inside of a container that also modify the host system. As the kernel
evolves we expect to see more sysctls become namespaced.
See the definition of the `--sysctl` option above for the current list of
supported sysctls.
# HISTORY # HISTORY
April 2014, Originally compiled by William Henry (whenry at redhat dot com) April 2014, Originally compiled by William Henry (whenry at redhat dot com)
based on docker.com source material and internal work. based on docker.com source material and internal work.

View file

@ -240,3 +240,35 @@ func ValidateLabel(val string) (string, error) {
} }
return val, nil return val, nil
} }
// ValidateSysctl validates an sysctl and returns it.
func ValidateSysctl(val string) (string, error) {
validSysctlMap := map[string]bool{
"kernel.msgmax": true,
"kernel.msgmnb": true,
"kernel.msgmni": true,
"kernel.sem": true,
"kernel.shmall": true,
"kernel.shmmax": true,
"kernel.shmmni": true,
"kernel.shm_rmid_forced": true,
}
validSysctlPrefixes := []string{
"net.",
"fs.mqueue.",
}
arr := strings.Split(val, "=")
if len(arr) < 2 {
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
}
if validSysctlMap[arr[0]] {
return val, nil
}
for _, vp := range validSysctlPrefixes {
if strings.HasPrefix(arr[0], vp) {
return val, nil
}
}
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
}

View file

@ -42,6 +42,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
flDevices = opts.NewListOpts(ValidateDevice) flDevices = opts.NewListOpts(ValidateDevice)
flUlimits = NewUlimitOpt(nil) flUlimits = NewUlimitOpt(nil)
flSysctls = opts.NewMapOpts(nil, opts.ValidateSysctl)
flPublish = opts.NewListOpts(nil) flPublish = opts.NewListOpts(nil)
flExpose = opts.NewListOpts(nil) flExpose = opts.NewListOpts(nil)
@ -127,6 +128,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
cmd.Var(&flSecurityOpt, []string{"-security-opt"}, "Security Options") cmd.Var(&flSecurityOpt, []string{"-security-opt"}, "Security Options")
cmd.Var(&flStorageOpt, []string{"-storage-opt"}, "Set storage driver options per container") cmd.Var(&flStorageOpt, []string{"-storage-opt"}, "Set storage driver options per container")
cmd.Var(flUlimits, []string{"-ulimit"}, "Ulimit options") cmd.Var(flUlimits, []string{"-ulimit"}, "Ulimit options")
cmd.Var(flSysctls, []string{"-sysctl"}, "Sysctl options")
cmd.Var(&flLoggingOpts, []string{"-log-opt"}, "Log driver options") cmd.Var(&flLoggingOpts, []string{"-log-opt"}, "Log driver options")
cmd.Require(flag.Min, 1) cmd.Require(flag.Min, 1)
@ -430,6 +432,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
ShmSize: shmSize, ShmSize: shmSize,
Resources: resources, Resources: resources,
Tmpfs: tmpfs, Tmpfs: tmpfs,
Sysctls: flSysctls.GetAll(),
} }
// When allocating stdin in attached mode, close stdin at client disconnect // When allocating stdin in attached mode, close stdin at client disconnect