Add init process for zombie fighting

This adds a small C binary for fighting zombies.  It is mounted under
`/dev/init` and is prepended to the args specified by the user.  You
enable it via a daemon flag, `dockerd --init`, as it is disable by
default for backwards compat.

You can also override the daemon option or specify this on a per
container basis with `docker run --init=true|false`.

You can test this by running a process like this as the pid 1 in a
container and see the extra zombie that appears in the container as it
is running.

```c

int main(int argc, char ** argv) {
	pid_t pid = fork();
	if (pid == 0) {
		pid = fork();
		if (pid == 0) {
			exit(0);
		}
		sleep(3);
		exit(0);
	}
	printf("got pid %d and exited\n", pid);
	sleep(20);
}
```

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2016-06-27 14:38:47 -07:00
parent d8c9951707
commit ee3ac3aa66
22 changed files with 112 additions and 68 deletions

View File

@ -255,6 +255,16 @@ RUN set -x \
&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
&& rm -rf "$GOPATH"
ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
&& cd "$GOPATH/grimes" \
&& git checkout -q "$GRIMES_COMMIT" \
&& make \
&& cp init /usr/local/bin/docker-init \
&& rm -rf "$GOPATH"
# Wrap all commands in the "docker-in-docker" script to allow nested containers
ENTRYPOINT ["hack/dind"]

View File

@ -198,6 +198,16 @@ RUN set -x \
&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
&& rm -rf "$GOPATH"
ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
&& cd "$GOPATH/grimes" \
&& git checkout -q "$GRIMES_COMMIT" \
&& make \
&& cp init /usr/local/bin/docker-init \
&& rm -rf "$GOPATH"
# Wrap all commands in the "docker-in-docker" script to allow nested containers
ENTRYPOINT ["hack/dind"]

View File

@ -196,6 +196,16 @@ RUN set -x \
&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
&& rm -rf "$GOPATH"
ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
&& cd "$GOPATH/grimes" \
&& git checkout -q "$GRIMES_COMMIT" \
&& make \
&& cp init /usr/local/bin/docker-init \
&& rm -rf "$GOPATH"
ENTRYPOINT ["hack/dind"]
# Upload docker source

View File

@ -216,6 +216,16 @@ RUN set -x \
&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
&& rm -rf "$GOPATH"
ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
&& cd "$GOPATH/grimes" \
&& git checkout -q "$GRIMES_COMMIT" \
&& make \
&& cp init /usr/local/bin/docker-init \
&& rm -rf "$GOPATH"
# Wrap all commands in the "docker-in-docker" script to allow nested containers
ENTRYPOINT ["hack/dind"]

View File

@ -208,6 +208,16 @@ RUN set -x \
&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
&& rm -rf "$GOPATH"
ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
&& cd "$GOPATH/grimes" \
&& git checkout -q "$GRIMES_COMMIT" \
&& make \
&& cp init /usr/local/bin/docker-init \
&& rm -rf "$GOPATH"
# Wrap all commands in the "docker-in-docker" script to allow nested containers
ENTRYPOINT ["hack/dind"]

View File

@ -80,6 +80,16 @@ RUN set -x \
&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
&& rm -rf "$GOPATH"
ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
&& cd "$GOPATH/grimes" \
&& git checkout -q "$GRIMES_COMMIT" \
&& make \
&& cp init /usr/local/bin/docker-init \
&& rm -rf "$GOPATH"
ENV AUTO_GOPATH 1
WORKDIR /usr/src/docker
COPY . /usr/src/docker

View File

@ -321,6 +321,9 @@ type HostConfig struct {
// Mounts specs used by the container
Mounts []mount.Mount `json:",omitempty"`
// Run a custom init inside the container, if null, use the daemon's configured settings
Init *bool `json:",om itempty"`
}
// Box specifies height and width dimensions. Used for sizing of a console.

View File

@ -35,6 +35,7 @@ type Config struct {
Runtimes map[string]types.Runtime `json:"runtimes,omitempty"`
DefaultRuntime string `json:"default-runtime,omitempty"`
OOMScoreAdjust int `json:"oom-score-adjust,omitempty"`
Init bool `json:"init,omitempty"`
}
// bridgeConfig stores all the bridge driver specific
@ -91,6 +92,7 @@ func (config *Config) InstallFlags(flags *pflag.FlagSet) {
flags.Var(runconfigopts.NewNamedRuntimeOpt("runtimes", &config.Runtimes, stockRuntimeName), "add-runtime", "Register an additional OCI compatible runtime")
flags.StringVar(&config.DefaultRuntime, "default-runtime", stockRuntimeName, "Default OCI runtime for containers")
flags.IntVar(&config.OOMScoreAdjust, "oom-score-adjust", -500, "Set the oom_score_adj for the daemon")
flags.BoolVar(&config.Init, "init", false, "Run an init in the container to forward signals and reap processes")
config.attachExperimentalFlags(flags)
}

View File

@ -4,6 +4,7 @@ import (
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
@ -585,6 +586,26 @@ func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container)
cwd = "/"
}
s.Process.Args = append([]string{c.Path}, c.Args...)
// only add the custom init if it is specified and the container is running in its
// own private pid namespace. It does not make sense to add if it is running in the
// host namespace or another container's pid namespace where we already have an init
if c.HostConfig.PidMode.IsPrivate() {
if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
(c.HostConfig.Init == nil && daemon.configStore.Init) {
s.Process.Args = append([]string{"/dev/init", c.Path}, c.Args...)
path, err := exec.LookPath("docker-init")
if err != nil {
return err
}
s.Mounts = append(s.Mounts, specs.Mount{
Destination: "/dev/init",
Type: "bind",
Source: path,
Options: []string{"bind", "ro"},
})
}
}
s.Process.Cwd = cwd
s.Process.Env = c.CreateDaemonEnvironment(linkedEnv)
s.Process.Terminal = c.Config.Tty

View File

@ -48,6 +48,7 @@ Options:
-H, --host=[] Daemon socket(s) to connect to
--help Print usage
--icc=true Enable inter-container communication
--init Run an init inside containers to forward signals and reap processes
--insecure-registry=[] Enable insecure registry communication
--ip=0.0.0.0 Default IP when binding container ports
--ip-forward=true Enable net.ipv4.ip_forward
@ -1140,6 +1141,7 @@ This is a full example of the allowed configuration options on Linux:
"group": "",
"cgroup-parent": "",
"default-ulimits": {},
"init": false,
"ipv6": false,
"iptables": false,
"ip-forward": false,

View File

@ -255,7 +255,7 @@ bundle() {
source "$SCRIPTDIR/make/$bundle" "$@"
}
copy_containerd() {
copy_binaries() {
dir="$1"
# Add nested executables to bundle dir so we have complete set of
# them available, but only if the native OS/ARCH is the same as the
@ -263,7 +263,7 @@ copy_containerd() {
if [ "$(go env GOOS)/$(go env GOARCH)" == "$(go env GOHOSTOS)/$(go env GOHOSTARCH)" ]; then
if [ -x /usr/local/bin/docker-runc ]; then
echo "Copying nested executables into $dir"
for file in containerd containerd-shim containerd-ctr runc; do
for file in containerd containerd-shim containerd-ctr runc init; do
cp `which "docker-$file"` "$dir/"
if [ "$2" == "hash" ]; then
hash_files "$dir/docker-$file"

View File

@ -7,3 +7,4 @@ DOCKER_CONTAINERD_BINARY_NAME='docker-containerd'
DOCKER_CONTAINERD_CTR_BINARY_NAME='docker-containerd-ctr'
DOCKER_CONTAINERD_SHIM_BINARY_NAME='docker-containerd-shim'
DOCKER_PROXY_BINARY_NAME='docker-proxy'
DOCKER_INIT_BINARY_NAME='docker-init'

View File

@ -12,5 +12,5 @@ set -e
export BINARY_SHORT_NAME="$DOCKER_PROXY_BINARY_NAME"
export SOURCE_PATH='./vendor/src/github.com/docker/libnetwork/cmd/proxy'
source "${MAKEDIR}/.binary"
copy_containerd "$DEST" 'hash'
copy_binaries "$DEST" 'hash'
)

View File

@ -12,4 +12,5 @@ rm -rf "$DEST"
install_binary "${DEST}/${DOCKER_CONTAINERD_CTR_BINARY_NAME}"
install_binary "${DEST}/${DOCKER_CONTAINERD_SHIM_BINARY_NAME}"
install_binary "${DEST}/${DOCKER_PROXY_BINARY_NAME}"
install_binary "${DEST}/${DOCKER_INIT_BINARY_NAME}"
)

View File

@ -53,8 +53,8 @@ for d in "$CROSS/"*/*; do
cp -L "$d/$PROXY_BINARY_FULLNAME" "$TAR_PATH/${DOCKER_PROXY_BINARY_NAME}${BINARY_EXTENSION}"
fi
# copy over all the containerd binaries
copy_containerd $TAR_PATH
# copy over all the extra binaries
copy_binaries $TAR_PATH
if [ "$IS_TAR" == "true" ]; then
echo "Creating tgz from $BUILD_PATH and naming it $TGZ"

View File

@ -78,40 +78,6 @@ func (s *DockerSuite) TestEventsUntag(c *check.C) {
}
}
func (s *DockerSuite) TestEventsContainerFailStartDie(c *check.C) {
_, _, err := dockerCmdWithError("run", "--name", "testeventdie", "busybox", "blerg")
c.Assert(err, checker.NotNil, check.Commentf("Container run with command blerg should have failed, but it did not"))
out, _ := dockerCmd(c, "events", "--since=0", "--until", daemonUnixTime(c))
events := strings.Split(strings.TrimSpace(out), "\n")
nEvents := len(events)
c.Assert(nEvents, checker.GreaterOrEqualThan, 1) //Missing expected event
actions := eventActionsByIDAndType(c, events, "testeventdie", "container")
var startEvent bool
var dieEvent bool
for _, a := range actions {
switch a {
case "start":
startEvent = true
case "die":
dieEvent = true
}
}
// Windows platform is different from Linux, it will start container whatever
// so Windows can get start/die event but Linux can't
if daemonPlatform == "windows" {
c.Assert(startEvent, checker.True, check.Commentf("Start event not found: %v\n%v", actions, events))
c.Assert(dieEvent, checker.True, check.Commentf("Die event not found: %v\n%v", actions, events))
} else {
c.Assert(startEvent, checker.False, check.Commentf("Start event not expected: %v\n%v", actions, events))
c.Assert(dieEvent, checker.False, check.Commentf("Die event not expected: %v\n%v", actions, events))
}
}
func (s *DockerSuite) TestEventsLimit(c *check.C) {
var waitGroup sync.WaitGroup
errChan := make(chan error, 17)

View File

@ -2403,30 +2403,6 @@ func (s *DockerSuite) TestRunExposePort(c *check.C) {
c.Assert(out, checker.Contains, "invalid range format for --expose")
}
func (s *DockerSuite) TestRunUnknownCommand(c *check.C) {
out, _, _ := dockerCmdWithStdoutStderr(c, "create", "busybox", "/bin/nada")
cID := strings.TrimSpace(out)
_, _, err := dockerCmdWithError("start", cID)
// Windows and Linux are different here by architectural design. Linux will
// fail to start the container, so an error is expected. Windows will
// successfully start the container, and once started attempt to execute
// the command which will fail.
if daemonPlatform == "windows" {
// Wait for it to exit.
waitExited(cID, 30*time.Second)
c.Assert(err, check.IsNil)
} else {
c.Assert(err, check.NotNil)
}
rc := inspectField(c, cID, "State.ExitCode")
if rc == "0" {
c.Fatalf("ExitCode(%v) cannot be 0", rc)
}
}
func (s *DockerSuite) TestRunModeIpcHost(c *check.C) {
// Not applicable on Windows as uses Unix-specific capabilities
testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)

View File

@ -1234,11 +1234,11 @@ func (s *DockerSuite) TestRunPidsLimit(c *check.C) {
testRequires(c, pidsLimit)
file := "/sys/fs/cgroup/pids/pids.max"
out, _ := dockerCmd(c, "run", "--name", "skittles", "--pids-limit", "2", "busybox", "cat", file)
c.Assert(strings.TrimSpace(out), checker.Equals, "2")
out, _ := dockerCmd(c, "run", "--name", "skittles", "--pids-limit", "4", "busybox", "cat", file)
c.Assert(strings.TrimSpace(out), checker.Equals, "4")
out = inspectField(c, "skittles", "HostConfig.PidsLimit")
c.Assert(out, checker.Equals, "2", check.Commentf("setting the pids limit failed"))
c.Assert(out, checker.Equals, "4", check.Commentf("setting the pids limit failed"))
}
func (s *DockerSuite) TestRunPrivilegedAllowedDevices(c *check.C) {

View File

@ -34,6 +34,7 @@ dockerd - Enable daemon mode
[**-H**|**--host**[=*[]*]]
[**--help**]
[**--icc**[=*true*]]
[**--init**[=*false*]]
[**--insecure-registry**[=*[]*]]
[**--ip**[=*0.0.0.0*]]
[**--ip-forward**[=*true*]]
@ -166,6 +167,9 @@ unix://[/path/to/socket] to use.
**--icc**=*true*|*false*
Allow unrestricted inter\-container and Docker daemon host communication. If disabled, containers can still be linked together using the **--link** option (see **docker-run(1)**). Default is true.
**--init**
Run an init process inside containers for signal forwarding and process reaping.
**--insecure-registry**=[]
Enable insecure registry communication, i.e., enable un-encrypted and/or untrusted communication.

View File

@ -59,7 +59,6 @@ func DefaultSpec() specs.Spec {
Options: []string{"nosuid", "noexec", "nodev"},
},
}
s.Process.Capabilities = []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",

View File

@ -2,10 +2,11 @@ package opts
import (
"fmt"
fopts "github.com/docker/docker/opts"
"net"
"os"
"strings"
fopts "github.com/docker/docker/opts"
)
// ValidateAttach validates that the specified string is a valid attach option.

View File

@ -103,6 +103,7 @@ type ContainerOptions struct {
healthRetries int
runtime string
autoRemove bool
init bool
Image string
Args []string
@ -243,6 +244,8 @@ func AddFlags(flags *pflag.FlagSet) *ContainerOptions {
flags.StringVar(&copts.shmSize, "shm-size", "", "Size of /dev/shm, default value is 64MB")
flags.StringVar(&copts.utsMode, "uts", "", "UTS namespace to use")
flags.StringVar(&copts.runtime, "runtime", "", "Runtime to use for this container")
flags.BoolVar(&copts.init, "init", false, "Run an init inside the container that forwards signals and reaps processes")
return copts
}
@ -593,6 +596,11 @@ func Parse(flags *pflag.FlagSet, copts *ContainerOptions) (*container.Config, *c
Runtime: copts.runtime,
}
// only set this value if the user provided the flag, else it should default to nil
if flags.Changed("init") {
hostConfig.Init = &copts.init
}
// When allocating stdin in attached mode, close stdin at client disconnect
if config.OpenStdin && config.AttachStdin {
config.StdinOnce = true