mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Add support for setting sysctls
This patch will allow users to specify namespace specific "kernel parameters" for running inside of a container. Signed-off-by: Dan Walsh <dwalsh@redhat.com>
This commit is contained in:
parent
7b5a6844c2
commit
9caf7aeefd
12 changed files with 157 additions and 0 deletions
|
@ -1671,6 +1671,7 @@ _docker_run() {
|
|||
--shm-size
|
||||
--stop-signal
|
||||
--tmpfs
|
||||
--sysctl
|
||||
--ulimit
|
||||
--user -u
|
||||
--userns
|
||||
|
|
|
@ -644,6 +644,7 @@ __docker_subcommand() {
|
|||
"($help)--privileged[Give extended privileges to this container]"
|
||||
"($help)--read-only[Mount the container's root filesystem as read only]"
|
||||
"($help)*--security-opt=[Security options]:security option: "
|
||||
"($help)*--sysctl=-[sysctl options]:sysctl: "
|
||||
"($help -t --tty)"{-t,--tty}"[Allocate a pseudo-tty]"
|
||||
"($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
|
||||
"($help)--tmpfs[mount tmpfs]"
|
||||
|
|
|
@ -611,6 +611,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, e
|
|||
return nil, fmt.Errorf("linux runtime spec resources: %v", err)
|
||||
}
|
||||
s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
|
||||
s.Linux.Sysctl = c.HostConfig.Sysctls
|
||||
if err := setDevices(&s, c); err != nil {
|
||||
return nil, fmt.Errorf("linux runtime spec devices: %v", err)
|
||||
}
|
||||
|
|
|
@ -176,6 +176,7 @@ This section lists each version from latest to oldest. Each listing includes a
|
|||
|
||||
[Docker Remote API v1.21](docker_remote_api_v1.21.md) documentation
|
||||
|
||||
* `POST /containers/create` and `POST /containers/(id)/start` allow you to configure kernel parameters (sysctls) for use in the container.
|
||||
* `GET /volumes` lists volumes from all volume drivers.
|
||||
* `POST /volumes/create` to create a volume.
|
||||
* `GET /volumes/(name)` get low-level information about a volume.
|
||||
|
|
|
@ -199,6 +199,7 @@ Create a container
|
|||
"RestartPolicy": { "Name": "", "MaximumRetryCount": 0 },
|
||||
"NetworkMode": "bridge",
|
||||
"Devices": [],
|
||||
"Sysctls": { "net.ipv4.ip_forward": "1" },
|
||||
"Ulimits": [{}],
|
||||
"LogConfig": { "Type": "json-file", "Config": {} },
|
||||
"SecurityOpt": [],
|
||||
|
@ -306,6 +307,10 @@ Json Parameters:
|
|||
- **Devices** - A list of devices to add to the container specified as a JSON object in the
|
||||
form
|
||||
`{ "PathOnHost": "/dev/deviceName", "PathInContainer": "/dev/deviceName", "CgroupPermissions": "mrw"}`
|
||||
- **Sysctls** - A list of kernel parameters (sysctls) to set in the container, specified as
|
||||
`{ <name>: <Value> }`, for example:
|
||||
`{ "net.ipv4.ip_forward": "1" }`
|
||||
|
||||
- **Ulimits** - A list of ulimits to set in the container, specified as
|
||||
`{ "Name": <name>, "Soft": <soft limit>, "Hard": <hard limit> }`, for example:
|
||||
`Ulimits: { "Name": "nofile", "Soft": 1024, "Hard": 2048 }`
|
||||
|
@ -426,6 +431,9 @@ Return low-level information on the container `id`
|
|||
"Type": "json-file"
|
||||
},
|
||||
"SecurityOpt": null,
|
||||
"Sysctls": {
|
||||
"net.ipv4.ip_forward": "1"
|
||||
},
|
||||
"VolumesFrom": null,
|
||||
"Ulimits": [{}],
|
||||
"VolumeDriver": ""
|
||||
|
|
|
@ -82,6 +82,7 @@ Creates a new container.
|
|||
--stop-signal="SIGTERM" Signal to stop a container
|
||||
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
|
||||
--storage-opt=[] Set storage driver options per container
|
||||
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
|
||||
-t, --tty Allocate a pseudo-TTY
|
||||
-u, --user="" Username or UID
|
||||
--userns="" Container user namespace
|
||||
|
|
|
@ -84,6 +84,7 @@ parent = "smn_cli"
|
|||
--sig-proxy=true Proxy received signals to the process
|
||||
--stop-signal="SIGTERM" Signal to stop a container
|
||||
--storage-opt=[] Set storage driver options per container
|
||||
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
|
||||
-t, --tty Allocate a pseudo-TTY
|
||||
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
|
||||
--userns="" Container user namespace
|
||||
|
@ -620,3 +621,30 @@ If you have set the `--exec-opt isolation=hyperv` option on the Docker `daemon`,
|
|||
$ docker run -d --isolation default busybox top
|
||||
$ docker run -d --isolation hyperv busybox top
|
||||
```
|
||||
|
||||
### Configure namespaced kernel parameters (sysctls) at runtime
|
||||
|
||||
The `--sysctl` sets namespaced kernel parameters (sysctls) in the
|
||||
container. For example, to turn on IP forwarding in the containers
|
||||
network namespace, run this command:
|
||||
|
||||
$ docker run --sysctl net.ipv4.ip_forward=1 someimage
|
||||
|
||||
|
||||
> **Note**: Not all sysctls are namespaced. docker does not support changing sysctls
|
||||
> inside of a container that also modify the host system. As the kernel
|
||||
> evolves we expect to see more sysctls become namespaced.
|
||||
|
||||
#### Currently supported sysctls
|
||||
|
||||
`IPC Namespace`:
|
||||
|
||||
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
|
||||
Sysctls beginning with fs.mqueue.*
|
||||
|
||||
If you use the `--ipc=host` option these sysctls will not be allowed.
|
||||
|
||||
`Network Namespace`:
|
||||
Sysctls beginning with net.*
|
||||
|
||||
If you use the `--net=host` option using these sysctls will not be allowed.
|
||||
|
|
|
@ -4,6 +4,7 @@ package main
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
@ -747,6 +748,37 @@ func (s *DockerSuite) TestRunTmpfsMounts(c *check.C) {
|
|||
}
|
||||
}
|
||||
|
||||
func (s *DockerSuite) TestRunSysctls(c *check.C) {
|
||||
|
||||
testRequires(c, DaemonIsLinux)
|
||||
var err error
|
||||
|
||||
out, _ := dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=1", "--name", "test", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
|
||||
c.Assert(strings.TrimSpace(out), check.Equals, "1")
|
||||
|
||||
out = inspectFieldJSON(c, "test", "HostConfig.Sysctls")
|
||||
|
||||
sysctls := make(map[string]string)
|
||||
err = json.Unmarshal([]byte(out), &sysctls)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "1")
|
||||
|
||||
out, _ = dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=0", "--name", "test1", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
|
||||
c.Assert(strings.TrimSpace(out), check.Equals, "0")
|
||||
|
||||
out = inspectFieldJSON(c, "test1", "HostConfig.Sysctls")
|
||||
|
||||
err = json.Unmarshal([]byte(out), &sysctls)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "0")
|
||||
|
||||
runCmd := exec.Command(dockerBinary, "run", "--sysctl", "kernel.foobar=1", "--name", "test2", "busybox", "cat", "/proc/sys/kernel/foobar")
|
||||
out, _, _ = runCommandWithOutput(runCmd)
|
||||
if !strings.Contains(out, "invalid value") {
|
||||
c.Fatalf("expected --sysctl to fail, got %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunSeccompProfileDenyUnshare checks that 'docker run --security-opt seccomp=/tmp/profile.json debian:jessie unshare' exits with operation not permitted.
|
||||
func (s *DockerSuite) TestRunSeccompProfileDenyUnshare(c *check.C) {
|
||||
testRequires(c, SameHostDaemon, seccompEnabled, NotArm, Apparmor)
|
||||
|
|
|
@ -67,6 +67,7 @@ docker-create - Create a new container
|
|||
[**--storage-opt**[=*[]*]]
|
||||
[**--stop-signal**[=*SIGNAL*]]
|
||||
[**--shm-size**[=*[]*]]
|
||||
[**--sysctl**[=*[]*]]
|
||||
[**-t**|**--tty**]
|
||||
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
|
||||
[**-u**|**--user**[=*USER*]]
|
||||
|
@ -336,6 +337,21 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
|
|||
**--stop-signal**=*SIGTERM*
|
||||
Signal to stop a container. Default is SIGTERM.
|
||||
|
||||
**--sysctl**=SYSCTL
|
||||
Configure namespaced kernel parameters at runtime
|
||||
|
||||
IPC Namespace - current sysctls allowed:
|
||||
|
||||
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
|
||||
Sysctls beginning with fs.mqueue.*
|
||||
|
||||
Note: if you use --ipc=host using these sysctls will not be allowed.
|
||||
|
||||
Network Namespace - current sysctls allowed:
|
||||
Sysctls beginning with net.*
|
||||
|
||||
Note: if you use --net=host using these sysctls will not be allowed.
|
||||
|
||||
**-t**, **--tty**=*true*|*false*
|
||||
Allocate a pseudo-TTY. The default is *false*.
|
||||
|
||||
|
|
|
@ -71,6 +71,7 @@ docker-run - Run a command in a new container
|
|||
[**--stop-signal**[=*SIGNAL*]]
|
||||
[**--shm-size**[=*[]*]]
|
||||
[**--sig-proxy**[=*true*]]
|
||||
[**--sysctl**[=*[]*]]
|
||||
[**-t**|**--tty**]
|
||||
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
|
||||
[**-u**|**--user**[=*USER*]]
|
||||
|
@ -492,6 +493,21 @@ its root filesystem mounted as read only prohibiting any writes.
|
|||
`number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).
|
||||
If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
|
||||
|
||||
**--sysctl**=SYSCTL
|
||||
Configure namespaced kernel parameters at runtime
|
||||
|
||||
IPC Namespace - current sysctls allowed:
|
||||
|
||||
kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
|
||||
Sysctls beginning with fs.mqueue.*
|
||||
|
||||
If you use the `--ipc=host` option these sysctls will not be allowed.
|
||||
|
||||
Network Namespace - current sysctls allowed:
|
||||
Sysctls beginning with net.*
|
||||
|
||||
If you use the `--net=host` option these sysctls will not be allowed.
|
||||
|
||||
**--sig-proxy**=*true*|*false*
|
||||
Proxy received signals to the process (non-TTY mode only). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. The default is *true*.
|
||||
|
||||
|
@ -955,6 +971,23 @@ $ docker run -d --isolation default busybox top
|
|||
$ docker run -d --isolation hyperv busybox top
|
||||
```
|
||||
|
||||
## Setting Namespaced Kernel Parameters (Sysctls)
|
||||
|
||||
The `--sysctl` sets namespaced kernel parameters (sysctls) in the
|
||||
container. For example, to turn on IP forwarding in the containers
|
||||
network namespace, run this command:
|
||||
|
||||
$ docker run --sysctl net.ipv4.ip_forward=1 someimage
|
||||
|
||||
Note:
|
||||
|
||||
Not all sysctls are namespaced. docker does not support changing sysctls
|
||||
inside of a container that also modify the host system. As the kernel
|
||||
evolves we expect to see more sysctls become namespaced.
|
||||
|
||||
See the definition of the `--sysctl` option above for the current list of
|
||||
supported sysctls.
|
||||
|
||||
# HISTORY
|
||||
April 2014, Originally compiled by William Henry (whenry at redhat dot com)
|
||||
based on docker.com source material and internal work.
|
||||
|
|
32
opts/opts.go
32
opts/opts.go
|
@ -240,3 +240,35 @@ func ValidateLabel(val string) (string, error) {
|
|||
}
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// ValidateSysctl validates an sysctl and returns it.
|
||||
func ValidateSysctl(val string) (string, error) {
|
||||
validSysctlMap := map[string]bool{
|
||||
"kernel.msgmax": true,
|
||||
"kernel.msgmnb": true,
|
||||
"kernel.msgmni": true,
|
||||
"kernel.sem": true,
|
||||
"kernel.shmall": true,
|
||||
"kernel.shmmax": true,
|
||||
"kernel.shmmni": true,
|
||||
"kernel.shm_rmid_forced": true,
|
||||
}
|
||||
validSysctlPrefixes := []string{
|
||||
"net.",
|
||||
"fs.mqueue.",
|
||||
}
|
||||
arr := strings.Split(val, "=")
|
||||
if len(arr) < 2 {
|
||||
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
|
||||
}
|
||||
if validSysctlMap[arr[0]] {
|
||||
return val, nil
|
||||
}
|
||||
|
||||
for _, vp := range validSysctlPrefixes {
|
||||
if strings.HasPrefix(arr[0], vp) {
|
||||
return val, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
|
|||
flDevices = opts.NewListOpts(ValidateDevice)
|
||||
|
||||
flUlimits = NewUlimitOpt(nil)
|
||||
flSysctls = opts.NewMapOpts(nil, opts.ValidateSysctl)
|
||||
|
||||
flPublish = opts.NewListOpts(nil)
|
||||
flExpose = opts.NewListOpts(nil)
|
||||
|
@ -127,6 +128,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
|
|||
cmd.Var(&flSecurityOpt, []string{"-security-opt"}, "Security Options")
|
||||
cmd.Var(&flStorageOpt, []string{"-storage-opt"}, "Set storage driver options per container")
|
||||
cmd.Var(flUlimits, []string{"-ulimit"}, "Ulimit options")
|
||||
cmd.Var(flSysctls, []string{"-sysctl"}, "Sysctl options")
|
||||
cmd.Var(&flLoggingOpts, []string{"-log-opt"}, "Log driver options")
|
||||
|
||||
cmd.Require(flag.Min, 1)
|
||||
|
@ -430,6 +432,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
|
|||
ShmSize: shmSize,
|
||||
Resources: resources,
|
||||
Tmpfs: tmpfs,
|
||||
Sysctls: flSysctls.GetAll(),
|
||||
}
|
||||
|
||||
// When allocating stdin in attached mode, close stdin at client disconnect
|
||||
|
|
Loading…
Reference in a new issue