diff --git a/daemon/graphdriver/overlay2/mount.go b/daemon/graphdriver/overlay2/mount.go index dcd7c01490..eff0f781bf 100644 --- a/daemon/graphdriver/overlay2/mount.go +++ b/daemon/graphdriver/overlay2/mount.go @@ -50,6 +50,13 @@ func mountFrom(dir, device, target, mType string, flags uintptr, label string) e output := bytes.NewBuffer(nil) cmd.Stdout = output cmd.Stderr = output + + // reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, which + // causes the started process to be signaled when the creating OS thread + // dies. Ensure that the reexec is not prematurely signaled. See + // https://go.dev/issue/27505 for more information. + runtime.LockOSThread() + defer runtime.UnlockOSThread() if err := cmd.Start(); err != nil { w.Close() return fmt.Errorf("mountfrom error on re-exec cmd: %v", err) diff --git a/libcontainerd/supervisor/remote_daemon.go b/libcontainerd/supervisor/remote_daemon.go index 5213854ac8..a81bff75b2 100644 --- a/libcontainerd/supervisor/remote_daemon.go +++ b/libcontainerd/supervisor/remote_daemon.go @@ -6,6 +6,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strconv" "strings" "time" @@ -200,18 +201,34 @@ func (r *remote) startContainerd() error { cmd.Env = append(cmd.Env, e) } } - if err := cmd.Start(); err != nil { - return err - } - r.daemonWaitCh = make(chan struct{}) + startedCh := make(chan error) go func() { + // On Linux, when cmd.SysProcAttr.Pdeathsig is set, + // the signal is sent to the subprocess when the creating thread + // terminates. The runtime terminates a thread if a goroutine + // exits while locked to it. Prevent the containerd process + // from getting killed prematurely by ensuring that the thread + // used to start it remains alive until it or the daemon process + // exits. See https://go.dev/issue/27505 for more details. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + err := cmd.Start() + startedCh <- err + if err != nil { + return + } + + r.daemonWaitCh = make(chan struct{}) // Reap our child when needed if err := cmd.Wait(); err != nil { r.logger.WithError(err).Errorf("containerd did not exit successfully") } close(r.daemonWaitCh) }() + if err := <-startedCh; err != nil { + return err + } r.daemonPid = cmd.Process.Pid diff --git a/libnetwork/portmapper/proxy_linux.go b/libnetwork/portmapper/proxy_linux.go index fa0d11f884..7df0ab5157 100644 --- a/libnetwork/portmapper/proxy_linux.go +++ b/libnetwork/portmapper/proxy_linux.go @@ -6,6 +6,7 @@ import ( "net" "os" "os/exec" + "runtime" "strconv" "syscall" "time" @@ -37,16 +38,18 @@ func newProxyCommand(proto string, hostIP net.IP, hostPort int, containerIP net. Path: path, Args: args, SysProcAttr: &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the daemon process dies + Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505) }, }, + wait: make(chan error, 1), }, nil } // proxyCommand wraps an exec.Cmd to run the userland TCP and UDP // proxies as separate processes. type proxyCommand struct { - cmd *exec.Cmd + cmd *exec.Cmd + wait chan error } func (p *proxyCommand) Start() error { @@ -56,7 +59,29 @@ func (p *proxyCommand) Start() error { } defer r.Close() p.cmd.ExtraFiles = []*os.File{w} - if err := p.cmd.Start(); err != nil { + + // As p.cmd.SysProcAttr.Pdeathsig is set, the signal will be sent to the + // process when the OS thread on which p.cmd.Start() was executed dies. + // If the thread is allowed to be released back into the goroutine + // thread pool, the thread could get terminated at any time if a + // goroutine gets scheduled onto it which calls runtime.LockOSThread() + // and exits without a matching number of runtime.UnlockOSThread() + // calls. Ensure that the thread from which Start() is called stays + // alive until the proxy or the daemon process exits to prevent the + // proxy from getting terminated early. See https://go.dev/issue/27505 + // for more details. + started := make(chan error) + go func() { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + err := p.cmd.Start() + started <- err + if err != nil { + return + } + p.wait <- p.cmd.Wait() + }() + if err := <-started; err != nil { return err } w.Close() @@ -92,7 +117,7 @@ func (p *proxyCommand) Stop() error { if err := p.cmd.Process.Signal(os.Interrupt); err != nil { return err } - return p.cmd.Wait() + return <-p.wait } return nil } diff --git a/pkg/chrootarchive/archive_unix.go b/pkg/chrootarchive/archive_unix.go index b3a8ae1135..13bb82a2e4 100644 --- a/pkg/chrootarchive/archive_unix.go +++ b/pkg/chrootarchive/archive_unix.go @@ -95,6 +95,12 @@ func invokeUnpack(decompressedArchive io.Reader, dest string, options *archive.T cmd.Stdout = output cmd.Stderr = output + // reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, which + // causes the started process to be signaled when the creating OS thread + // dies. Ensure that the reexec is not prematurely signaled. See + // https://go.dev/issue/27505 for more information. + runtime.LockOSThread() + defer runtime.UnlockOSThread() if err := cmd.Start(); err != nil { w.Close() return fmt.Errorf("Untar error on re-exec cmd: %v", err) @@ -188,15 +194,27 @@ func invokePack(srcPath string, options *archive.TarOptions, root string) (io.Re return nil, errors.Wrap(err, "error getting options pipe for tar process") } - if err := cmd.Start(); err != nil { - return nil, errors.Wrap(err, "tar error on re-exec cmd") - } - + started := make(chan error) go func() { + // reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, + // which causes the started process to be signaled when the + // creating OS thread dies. Ensure that the subprocess is not + // prematurely signaled. See https://go.dev/issue/27505 for more + // information. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + if err := cmd.Start(); err != nil { + started <- err + return + } + close(started) err := cmd.Wait() err = errors.Wrapf(err, "error processing tar file: %s", errBuff) tarW.CloseWithError(err) }() + if err := <-started; err != nil { + return nil, errors.Wrap(err, "tar error on re-exec cmd") + } if err := json.NewEncoder(stdin).Encode(options); err != nil { stdin.Close() diff --git a/pkg/chrootarchive/diff_unix.go b/pkg/chrootarchive/diff_unix.go index e1bf74d1d5..554f934a2b 100644 --- a/pkg/chrootarchive/diff_unix.go +++ b/pkg/chrootarchive/diff_unix.go @@ -115,6 +115,12 @@ func applyLayerHandler(dest string, layer io.Reader, options *archive.TarOptions outBuf, errBuf := new(bytes.Buffer), new(bytes.Buffer) cmd.Stdout, cmd.Stderr = outBuf, errBuf + // reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, which + // causes the started process to be signaled when the creating OS thread + // dies. Ensure that the reexec is not prematurely signaled. See + // https://go.dev/issue/27505 for more information. + runtime.LockOSThread() + defer runtime.UnlockOSThread() if err = cmd.Run(); err != nil { return 0, fmt.Errorf("ApplyLayer %s stdout: %s stderr: %s", err, outBuf, errBuf) } diff --git a/pkg/reexec/command_linux.go b/pkg/reexec/command_linux.go index efea71794f..d7ec3d606f 100644 --- a/pkg/reexec/command_linux.go +++ b/pkg/reexec/command_linux.go @@ -17,6 +17,11 @@ func Self() string { // SysProcAttr.Pdeathsig to SIGTERM. // This will use the in-memory version (/proc/self/exe) of the current binary, // it is thus safe to delete or replace the on-disk binary (os.Args[0]). +// +// As SysProcAttr.Pdeathsig is set, the signal will be sent to the process when +// the OS thread which created the process dies. It is the caller's +// responsibility to ensure that the creating thread is not terminated +// prematurely. See https://go.dev/issue/27505 for more details. func Command(args ...string) *exec.Cmd { return &exec.Cmd{ Path: Self(),