mirror of
				https://github.com/moby/moby.git
				synced 2022-11-09 12:21:53 -05:00 
			
		
		
		
	Merge pull request #27737 from justincormack/no-ambient-1.13
Revert ambient capabilities for 1.13
This commit is contained in:
		
						commit
						cac13fc131
					
				
					 22 changed files with 1145 additions and 42 deletions
				
			
		| 
						 | 
				
			
			@ -6,6 +6,10 @@ WORKDIR /usr/src/
 | 
			
		|||
 | 
			
		||||
RUN gcc -g -Wall -static userns.c -o /usr/bin/userns-test \
 | 
			
		||||
	&& gcc -g -Wall -static ns.c -o /usr/bin/ns-test \
 | 
			
		||||
	&& gcc -g -Wall -static acct.c -o /usr/bin/acct-test
 | 
			
		||||
	&& gcc -g -Wall -static acct.c -o /usr/bin/acct-test \
 | 
			
		||||
	&& gcc -g -Wall -static setuid.c -o /usr/bin/setuid-test \
 | 
			
		||||
	&& gcc -g -Wall -static setgid.c -o /usr/bin/setgid-test \
 | 
			
		||||
	&& gcc -g -Wall -static socket.c -o /usr/bin/socket-test \
 | 
			
		||||
	&& gcc -g -Wall -static raw.c -o /usr/bin/raw-test
 | 
			
		||||
 | 
			
		||||
RUN [ "$(uname -m)" = "x86_64" ] && gcc -s -m32 -nostdlib exit32.s -o /usr/bin/exit32-test || true
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										14
									
								
								contrib/syscall-test/raw.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								contrib/syscall-test/raw.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,14 @@
 | 
			
		|||
#include <stdio.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/socket.h>
 | 
			
		||||
#include <netinet/ip.h>
 | 
			
		||||
#include <netinet/udp.h>
 | 
			
		||||
 | 
			
		||||
int main() {
 | 
			
		||||
	if (socket(PF_INET, SOCK_RAW, IPPROTO_UDP) == -1) {
 | 
			
		||||
		perror("socket");
 | 
			
		||||
		return 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										11
									
								
								contrib/syscall-test/setgid.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								contrib/syscall-test/setgid.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,11 @@
 | 
			
		|||
#include <sys/types.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
 | 
			
		||||
int main() {
 | 
			
		||||
	if (setgid(1) == -1) {
 | 
			
		||||
		perror("setgid");
 | 
			
		||||
		return 1;
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										11
									
								
								contrib/syscall-test/setuid.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								contrib/syscall-test/setuid.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,11 @@
 | 
			
		|||
#include <sys/types.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
 | 
			
		||||
int main() {
 | 
			
		||||
	if (setuid(1) == -1) {
 | 
			
		||||
		perror("setuid");
 | 
			
		||||
		return 1;
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										30
									
								
								contrib/syscall-test/socket.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								contrib/syscall-test/socket.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,30 @@
 | 
			
		|||
#include <stdio.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/types.h>
 | 
			
		||||
#include <sys/socket.h>
 | 
			
		||||
#include <netinet/in.h>
 | 
			
		||||
#include <arpa/inet.h>
 | 
			
		||||
 | 
			
		||||
int main() {
 | 
			
		||||
	int s;
 | 
			
		||||
	struct sockaddr_in sin;
 | 
			
		||||
 | 
			
		||||
	s = socket(AF_INET, SOCK_STREAM, 0);
 | 
			
		||||
	if (s == -1) {
 | 
			
		||||
		perror("socket");
 | 
			
		||||
		return 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sin.sin_family = AF_INET;
 | 
			
		||||
	sin.sin_addr.s_addr = INADDR_ANY;
 | 
			
		||||
	sin.sin_port = htons(80);
 | 
			
		||||
 | 
			
		||||
	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) == -1) {
 | 
			
		||||
		perror("bind");
 | 
			
		||||
		return 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	close(s);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -3,7 +3,7 @@ set -e
 | 
			
		|||
set -x
 | 
			
		||||
 | 
			
		||||
TOMLV_COMMIT=9baf8a8a9f2ed20a8e54160840c492f937eeaf9a
 | 
			
		||||
RUNC_COMMIT=02f8fa7863dd3f82909a73e2061897828460d52f
 | 
			
		||||
RUNC_COMMIT=ac031b5bf1cc92239461125f4c1ffb760522bbf2
 | 
			
		||||
CONTAINERD_COMMIT=52ef1ceb4b660c42cf4ea9013180a5663968d4c7
 | 
			
		||||
GRIMES_COMMIT=fe069a03affd2547fdb05e5b8b07202d2e41735b
 | 
			
		||||
LIBNETWORK_COMMIT=0f534354b813003a754606689722fe253101bc4e
 | 
			
		||||
| 
						 | 
				
			
			@ -20,11 +20,12 @@ else
 | 
			
		|||
	export GOPATH="$TMP_GOPATH"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# Do not build with ambient capabilities support
 | 
			
		||||
RUNC_BUILDTAGS="${RUNC_BUILDTAGS:-"seccomp apparmor selinux"}"
 | 
			
		||||
 | 
			
		||||
install_runc() {
 | 
			
		||||
	echo "Install runc version $RUNC_COMMIT"
 | 
			
		||||
	git clone https://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc"
 | 
			
		||||
	git clone https://github.com/docker/runc.git "$GOPATH/src/github.com/opencontainers/runc"
 | 
			
		||||
	cd "$GOPATH/src/github.com/opencontainers/runc"
 | 
			
		||||
	git checkout -q "$RUNC_COMMIT"
 | 
			
		||||
	make BUILDTAGS="$RUNC_BUILDTAGS" $1
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1155,24 +1155,185 @@ func (s *DockerSuite) TestRunNoNewPrivSetuid(c *check.C) {
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestRunAmbientCapabilities(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux, ambientCapabilities)
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesChown(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a non root user can gain capabilities
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "--user", "1000", "--cap-add", "chown", "busybox", "chown", "100", "/tmp")
 | 
			
		||||
	// test that a root user has default capability CAP_CHOWN
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "busybox", "chown", "100", "/tmp")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user has default capabilities
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000", "busybox", "chown", "100", "/tmp")
 | 
			
		||||
	_, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test this fails without cap_chown
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000", "--cap-drop", "chown", "busybox", "chown", "100", "/tmp")
 | 
			
		||||
	// test that non root user does not have default capability CAP_CHOWN
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "busybox", "chown", "100", "/tmp")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(strings.TrimSpace(out), checker.Equals, "chown: /tmp: Operation not permitted")
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
	// test that root user can drop default capability CAP_CHOWN
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--cap-drop", "chown", "busybox", "chown", "100", "/tmp")
 | 
			
		||||
	out, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesDacOverride(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_DAC_OVERRIDE
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", "echo test > /etc/passwd")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_DAC_OVERRIDE
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "busybox", "sh", "-c", "echo test > /etc/passwd")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Permission denied")
 | 
			
		||||
	// TODO test that root user can drop default capability CAP_DAC_OVERRIDE
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesFowner(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_FOWNER
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "busybox", "chmod", "777", "/etc/passwd")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_FOWNER
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "busybox", "chmod", "777", "/etc/passwd")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
	// TODO test that root user can drop default capability CAP_FOWNER
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO CAP_KILL
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesSetuid(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_SETUID
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "syscall-test", "setuid-test")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_SETUID
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "setuid-test")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
	// test that root user can drop default capability CAP_SETUID
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--cap-drop", "setuid", "syscall-test", "setuid-test")
 | 
			
		||||
	out, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesSetgid(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_SETGID
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "syscall-test", "setgid-test")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_SETGID
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "setgid-test")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
	// test that root user can drop default capability CAP_SETGID
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--cap-drop", "setgid", "syscall-test", "setgid-test")
 | 
			
		||||
	out, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO CAP_SETPCAP
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesNetBindService(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_NET_BIND_SERVICE
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "syscall-test", "socket-test")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_NET_BIND_SERVICE
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "socket-test")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Permission denied")
 | 
			
		||||
	// test that root user can drop default capability CAP_NET_BIND_SERVICE
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--cap-drop", "net_bind_service", "syscall-test", "socket-test")
 | 
			
		||||
	out, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Permission denied")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesNetRaw(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_NET_RAW
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "syscall-test", "raw-test")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_NET_RAW
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "raw-test")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
	// test that root user can drop default capability CAP_NET_RAW
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--cap-drop", "net_raw", "syscall-test", "raw-test")
 | 
			
		||||
	out, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesChroot(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_SYS_CHROOT
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "busybox", "chroot", "/", "/bin/true")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_SYS_CHROOT
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "busybox", "chroot", "/", "/bin/true")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
	// test that root user can drop default capability CAP_SYS_CHROOT
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--cap-drop", "sys_chroot", "busybox", "chroot", "/", "/bin/true")
 | 
			
		||||
	out, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestUserNoEffectiveCapabilitiesMknod(c *check.C) {
 | 
			
		||||
	testRequires(c, DaemonIsLinux)
 | 
			
		||||
	ensureSyscallTest(c)
 | 
			
		||||
 | 
			
		||||
	// test that a root user has default capability CAP_MKNOD
 | 
			
		||||
	runCmd := exec.Command(dockerBinary, "run", "busybox", "mknod", "/tmp/node", "b", "1", "2")
 | 
			
		||||
	_, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, check.IsNil)
 | 
			
		||||
	// test that non root user does not have default capability CAP_MKNOD
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--user", "1000:1000", "busybox", "mknod", "/tmp/node", "b", "1", "2")
 | 
			
		||||
	out, _, err := runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
	// test that root user can drop default capability CAP_MKNOD
 | 
			
		||||
	runCmd = exec.Command(dockerBinary, "run", "--cap-drop", "mknod", "busybox", "mknod", "/tmp/node", "b", "1", "2")
 | 
			
		||||
	out, _, err = runCommandWithOutput(runCmd)
 | 
			
		||||
	c.Assert(err, checker.NotNil, check.Commentf(out))
 | 
			
		||||
	c.Assert(out, checker.Contains, "Operation not permitted")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO CAP_AUDIT_WRITE
 | 
			
		||||
// TODO CAP_SETFCAP
 | 
			
		||||
 | 
			
		||||
func (s *DockerSuite) TestRunApparmorProcDirectory(c *check.C) {
 | 
			
		||||
	testRequires(c, SameHostDaemon, Apparmor)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"os"
 | 
			
		||||
	"os/exec"
 | 
			
		||||
| 
						 | 
				
			
			@ -53,15 +54,14 @@ func ensureSyscallTest(c *check.C) {
 | 
			
		|||
	gcc, err := exec.LookPath("gcc")
 | 
			
		||||
	c.Assert(err, checker.IsNil, check.Commentf("could not find gcc"))
 | 
			
		||||
 | 
			
		||||
	out, err := exec.Command(gcc, "-g", "-Wall", "-static", "../contrib/syscall-test/userns.c", "-o", tmp+"/"+"userns-test").CombinedOutput()
 | 
			
		||||
	c.Assert(err, checker.IsNil, check.Commentf(string(out)))
 | 
			
		||||
	out, err = exec.Command(gcc, "-g", "-Wall", "-static", "../contrib/syscall-test/ns.c", "-o", tmp+"/"+"ns-test").CombinedOutput()
 | 
			
		||||
	c.Assert(err, checker.IsNil, check.Commentf(string(out)))
 | 
			
		||||
	out, err = exec.Command(gcc, "-g", "-Wall", "-static", "../contrib/syscall-test/acct.c", "-o", tmp+"/"+"acct-test").CombinedOutput()
 | 
			
		||||
	c.Assert(err, checker.IsNil, check.Commentf(string(out)))
 | 
			
		||||
	tests := []string{"userns", "ns", "acct", "setuid", "setgid", "socket", "raw"}
 | 
			
		||||
	for _, test := range tests {
 | 
			
		||||
		out, err := exec.Command(gcc, "-g", "-Wall", "-static", fmt.Sprintf("../contrib/syscall-test/%s.c", test), "-o", fmt.Sprintf("%s/%s-test", tmp, test)).CombinedOutput()
 | 
			
		||||
		c.Assert(err, checker.IsNil, check.Commentf(string(out)))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if runtime.GOOS == "linux" && runtime.GOARCH == "amd64" {
 | 
			
		||||
		out, err = exec.Command(gcc, "-s", "-m32", "-nostdlib", "../contrib/syscall-test/exit32.s", "-o", tmp+"/"+"exit32-test").CombinedOutput()
 | 
			
		||||
		out, err := exec.Command(gcc, "-s", "-m32", "-nostdlib", "../contrib/syscall-test/exit32.s", "-o", tmp+"/"+"exit32-test").CombinedOutput()
 | 
			
		||||
		c.Assert(err, checker.IsNil, check.Commentf(string(out)))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -59,7 +59,7 @@ github.com/miekg/pkcs11 df8ae6ca730422dba20c768ff38ef7d79077a59f
 | 
			
		|||
github.com/docker/go v1.5.1-1-1-gbaf439e
 | 
			
		||||
github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
 | 
			
		||||
 | 
			
		||||
github.com/opencontainers/runc 02f8fa7863dd3f82909a73e2061897828460d52f # libcontainer
 | 
			
		||||
github.com/opencontainers/runc ac031b5bf1cc92239461125f4c1ffb760522bbf2 # libcontainer
 | 
			
		||||
github.com/opencontainers/runtime-spec 1c7c27d043c2a5e513a44084d2b10d77d1402b8c # specs
 | 
			
		||||
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
 | 
			
		||||
# libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										4
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unix.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unix.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -22,7 +22,7 @@ type Cgroup struct {
 | 
			
		|||
	// The path is assumed to be relative to the host system cgroup mountpoint.
 | 
			
		||||
	Path string `json:"path"`
 | 
			
		||||
 | 
			
		||||
	// ScopePrefix decribes prefix for the scope name
 | 
			
		||||
	// ScopePrefix describes prefix for the scope name
 | 
			
		||||
	ScopePrefix string `json:"scope_prefix"`
 | 
			
		||||
 | 
			
		||||
	// Paths represent the absolute cgroups paths to join.
 | 
			
		||||
| 
						 | 
				
			
			@ -95,7 +95,7 @@ type Resources struct {
 | 
			
		|||
	// IO read rate limit per cgroup per device, bytes per second.
 | 
			
		||||
	BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
 | 
			
		||||
 | 
			
		||||
	// IO write rate limit per cgroup per divice, bytes per second.
 | 
			
		||||
	// IO write rate limit per cgroup per device, bytes per second.
 | 
			
		||||
	BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
 | 
			
		||||
 | 
			
		||||
	// IO read rate limit per cgroup per device, IO per second.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -85,11 +85,6 @@ type Config struct {
 | 
			
		|||
	// that the parent process dies.
 | 
			
		||||
	ParentDeathSignal int `json:"parent_death_signal"`
 | 
			
		||||
 | 
			
		||||
	// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
 | 
			
		||||
	// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
 | 
			
		||||
	// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
 | 
			
		||||
	PivotDir string `json:"pivot_dir"`
 | 
			
		||||
 | 
			
		||||
	// Path to a directory containing the container's root filesystem.
 | 
			
		||||
	Rootfs string `json:"rootfs"`
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										9
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -1,5 +1,11 @@
 | 
			
		|||
package configs
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	// EXT_COPYUP is a directive to copy up the contents of a directory when
 | 
			
		||||
	// a tmpfs is mounted over it.
 | 
			
		||||
	EXT_COPYUP = 1 << iota
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Mount struct {
 | 
			
		||||
	// Source path for the mount.
 | 
			
		||||
	Source string `json:"source"`
 | 
			
		||||
| 
						 | 
				
			
			@ -22,6 +28,9 @@ type Mount struct {
 | 
			
		|||
	// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
 | 
			
		||||
	Relabel string `json:"relabel"`
 | 
			
		||||
 | 
			
		||||
	// Extensions are additional flags that are specific to runc.
 | 
			
		||||
	Extensions int `json:"extensions"`
 | 
			
		||||
 | 
			
		||||
	// Optional Command to be run before Source is mounted.
 | 
			
		||||
	PremountCmds []Command `json:"premount_cmds"`
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										8
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -22,8 +22,8 @@ var (
 | 
			
		|||
	supportedNamespaces = make(map[NamespaceType]bool)
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// nsToFile converts the namespace type to its filename
 | 
			
		||||
func nsToFile(ns NamespaceType) string {
 | 
			
		||||
// NsName converts the namespace type to its filename
 | 
			
		||||
func NsName(ns NamespaceType) string {
 | 
			
		||||
	switch ns {
 | 
			
		||||
	case NEWNET:
 | 
			
		||||
		return "net"
 | 
			
		||||
| 
						 | 
				
			
			@ -50,7 +50,7 @@ func IsNamespaceSupported(ns NamespaceType) bool {
 | 
			
		|||
	if ok {
 | 
			
		||||
		return supported
 | 
			
		||||
	}
 | 
			
		||||
	nsFile := nsToFile(ns)
 | 
			
		||||
	nsFile := NsName(ns)
 | 
			
		||||
	// if the namespace type is unknown, just return false
 | 
			
		||||
	if nsFile == "" {
 | 
			
		||||
		return false
 | 
			
		||||
| 
						 | 
				
			
			@ -84,7 +84,7 @@ func (n *Namespace) GetPath(pid int) string {
 | 
			
		|||
	if n.Path != "" {
 | 
			
		||||
		return n.Path
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type))
 | 
			
		||||
	return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (n *Namespaces) Remove(t NamespaceType) bool {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										4
									
								
								vendor/github.com/opencontainers/runc/libcontainer/label/label.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/opencontainers/runc/libcontainer/label/label.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -9,6 +9,10 @@ func InitLabels(options []string) (string, string, error) {
 | 
			
		|||
	return "", "", nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func GetROMountLabel() string {
 | 
			
		||||
	return ""
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func GenLabels(options string) (string, string, error) {
 | 
			
		||||
	return "", "", nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										22
									
								
								vendor/github.com/opencontainers/runc/libcontainer/label/label_selinux.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/opencontainers/runc/libcontainer/label/label_selinux.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -33,15 +33,19 @@ func InitLabels(options []string) (string, string, error) {
 | 
			
		|||
		pcon := selinux.NewContext(processLabel)
 | 
			
		||||
		mcon := selinux.NewContext(mountLabel)
 | 
			
		||||
		for _, opt := range options {
 | 
			
		||||
			if opt == "disable" {
 | 
			
		||||
			val := strings.SplitN(opt, "=", 2)
 | 
			
		||||
			if val[0] != "label" {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			if len(val) < 2 {
 | 
			
		||||
				return "", "", fmt.Errorf("bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt)
 | 
			
		||||
			}
 | 
			
		||||
			if val[1] == "disable" {
 | 
			
		||||
				return "", "", nil
 | 
			
		||||
			}
 | 
			
		||||
			if i := strings.Index(opt, ":"); i == -1 {
 | 
			
		||||
				return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt)
 | 
			
		||||
			}
 | 
			
		||||
			con := strings.SplitN(opt, ":", 2)
 | 
			
		||||
			if !validOptions[con[0]] {
 | 
			
		||||
				return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type'", con[0])
 | 
			
		||||
			con := strings.SplitN(val[1], ":", 2)
 | 
			
		||||
			if len(con) < 2 || !validOptions[con[0]] {
 | 
			
		||||
				return "", "", fmt.Errorf("bad label option %q, valid options 'disable, user, role, level, type'", con[0])
 | 
			
		||||
 | 
			
		||||
			}
 | 
			
		||||
			pcon[con[0]] = con[1]
 | 
			
		||||
| 
						 | 
				
			
			@ -55,6 +59,10 @@ func InitLabels(options []string) (string, string, error) {
 | 
			
		|||
	return processLabel, mountLabel, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func GetROMountLabel() string {
 | 
			
		||||
	return selinux.GetROFileLabel()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// DEPRECATED: The GenLabels function is only to be used during the transition to the official API.
 | 
			
		||||
func GenLabels(options string) (string, string, error) {
 | 
			
		||||
	return InitLabels(strings.Fields(options))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										32
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,32 @@
 | 
			
		|||
#ifndef NSENTER_NAMESPACE_H
 | 
			
		||||
#define NSENTER_NAMESPACE_H
 | 
			
		||||
 | 
			
		||||
#ifndef _GNU_SOURCE
 | 
			
		||||
#	define _GNU_SOURCE
 | 
			
		||||
#endif
 | 
			
		||||
#include <sched.h>
 | 
			
		||||
 | 
			
		||||
/* All of these are taken from include/uapi/linux/sched.h */
 | 
			
		||||
#ifndef CLONE_NEWNS
 | 
			
		||||
#	define CLONE_NEWNS 0x00020000 /* New mount namespace group */
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef CLONE_NEWCGROUP
 | 
			
		||||
#	define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef CLONE_NEWUTS
 | 
			
		||||
#	define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef CLONE_NEWIPC
 | 
			
		||||
#	define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef CLONE_NEWUSER
 | 
			
		||||
#	define CLONE_NEWUSER 0x10000000 /* New user namespace */
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef CLONE_NEWPID
 | 
			
		||||
#	define CLONE_NEWPID 0x20000000 /* New pid namespace */
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef CLONE_NEWNET
 | 
			
		||||
#	define CLONE_NEWNET 0x40000000 /* New network namespace */
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif /* NSENTER_NAMESPACE_H */
 | 
			
		||||
							
								
								
									
										12
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,12 @@
 | 
			
		|||
// +build linux,!gccgo
 | 
			
		||||
 | 
			
		||||
package nsenter
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
#cgo CFLAGS: -Wall
 | 
			
		||||
extern void nsexec();
 | 
			
		||||
void __attribute__((constructor)) init(void) {
 | 
			
		||||
	nsexec();
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
import "C"
 | 
			
		||||
							
								
								
									
										25
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,25 @@
 | 
			
		|||
// +build linux,gccgo
 | 
			
		||||
 | 
			
		||||
package nsenter
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
#cgo CFLAGS: -Wall
 | 
			
		||||
extern void nsexec();
 | 
			
		||||
void __attribute__((constructor)) init(void) {
 | 
			
		||||
	nsexec();
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
import "C"
 | 
			
		||||
 | 
			
		||||
// AlwaysFalse is here to stay false
 | 
			
		||||
// (and be exported so the compiler doesn't optimize out its reference)
 | 
			
		||||
var AlwaysFalse bool
 | 
			
		||||
 | 
			
		||||
func init() {
 | 
			
		||||
	if AlwaysFalse {
 | 
			
		||||
		// by referencing this C init() in a noop test, it will ensure the compiler
 | 
			
		||||
		// links in the C function.
 | 
			
		||||
		// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
 | 
			
		||||
		C.init()
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
// +build !linux !cgo
 | 
			
		||||
 | 
			
		||||
package nsenter
 | 
			
		||||
 | 
			
		||||
import "C"
 | 
			
		||||
							
								
								
									
										753
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										753
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,753 @@
 | 
			
		|||
#define _GNU_SOURCE
 | 
			
		||||
#include <endian.h>
 | 
			
		||||
#include <errno.h>
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
#include <grp.h>
 | 
			
		||||
#include <sched.h>
 | 
			
		||||
#include <setjmp.h>
 | 
			
		||||
#include <signal.h>
 | 
			
		||||
#include <stdarg.h>
 | 
			
		||||
#include <stdbool.h>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <stdbool.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
 | 
			
		||||
#include <sys/ioctl.h>
 | 
			
		||||
#include <sys/prctl.h>
 | 
			
		||||
#include <sys/socket.h>
 | 
			
		||||
#include <sys/types.h>
 | 
			
		||||
 | 
			
		||||
#include <linux/limits.h>
 | 
			
		||||
#include <linux/netlink.h>
 | 
			
		||||
#include <linux/types.h>
 | 
			
		||||
 | 
			
		||||
/* Get all of the CLONE_NEW* flags. */
 | 
			
		||||
#include "namespace.h"
 | 
			
		||||
 | 
			
		||||
/* Synchronisation values. */
 | 
			
		||||
enum sync_t {
 | 
			
		||||
	SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */
 | 
			
		||||
	SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
 | 
			
		||||
	SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
 | 
			
		||||
	SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
 | 
			
		||||
 | 
			
		||||
	/* XXX: This doesn't help with segfaults and other such issues. */
 | 
			
		||||
	SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* longjmp() arguments. */
 | 
			
		||||
#define JUMP_PARENT 0x00
 | 
			
		||||
#define JUMP_CHILD  0xA0
 | 
			
		||||
#define JUMP_INIT   0xA1
 | 
			
		||||
 | 
			
		||||
/* JSON buffer. */
 | 
			
		||||
#define JSON_MAX 4096
 | 
			
		||||
 | 
			
		||||
/* Assume the stack grows down, so arguments should be above it. */
 | 
			
		||||
struct clone_t {
 | 
			
		||||
	/*
 | 
			
		||||
	 * Reserve some space for clone() to locate arguments
 | 
			
		||||
	 * and retcode in this place
 | 
			
		||||
	 */
 | 
			
		||||
	char stack[4096] __attribute__ ((aligned(16)));
 | 
			
		||||
	char stack_ptr[0];
 | 
			
		||||
 | 
			
		||||
	/* There's two children. This is used to execute the different code. */
 | 
			
		||||
	jmp_buf *env;
 | 
			
		||||
	int jmpval;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct nlconfig_t {
 | 
			
		||||
	char *data;
 | 
			
		||||
	uint32_t cloneflags;
 | 
			
		||||
	char *uidmap;
 | 
			
		||||
	size_t uidmap_len;
 | 
			
		||||
	char *gidmap;
 | 
			
		||||
	size_t gidmap_len;
 | 
			
		||||
	char *namespaces;
 | 
			
		||||
	size_t namespaces_len;
 | 
			
		||||
	uint8_t is_setgroup;
 | 
			
		||||
	int consolefd;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * List of netlink message types sent to us as part of bootstrapping the init.
 | 
			
		||||
 * These constants are defined in libcontainer/message_linux.go.
 | 
			
		||||
 */
 | 
			
		||||
#define INIT_MSG		62000
 | 
			
		||||
#define CLONE_FLAGS_ATTR	27281
 | 
			
		||||
#define CONSOLE_PATH_ATTR	27282
 | 
			
		||||
#define NS_PATHS_ATTR		27283
 | 
			
		||||
#define UIDMAP_ATTR		27284
 | 
			
		||||
#define GIDMAP_ATTR		27285
 | 
			
		||||
#define SETGROUP_ATTR		27286
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Use the raw syscall for versions of glibc which don't include a function for
 | 
			
		||||
 * it, namely (glibc 2.12).
 | 
			
		||||
 */
 | 
			
		||||
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
 | 
			
		||||
#	define _GNU_SOURCE
 | 
			
		||||
#	include "syscall.h"
 | 
			
		||||
#	if !defined(SYS_setns) && defined(__NR_setns)
 | 
			
		||||
#		define SYS_setns __NR_setns
 | 
			
		||||
#	endif
 | 
			
		||||
 | 
			
		||||
#ifndef SYS_setns
 | 
			
		||||
#	error "setns(2) syscall not supported by glibc version"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int setns(int fd, int nstype)
 | 
			
		||||
{
 | 
			
		||||
	return syscall(SYS_setns, fd, nstype);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* XXX: This is ugly. */
 | 
			
		||||
static int syncfd = -1;
 | 
			
		||||
 | 
			
		||||
/* TODO(cyphar): Fix this so it correctly deals with syncT. */
 | 
			
		||||
#define bail(fmt, ...)								\
 | 
			
		||||
	do {									\
 | 
			
		||||
		int ret = __COUNTER__ + 1;					\
 | 
			
		||||
		fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__);	\
 | 
			
		||||
		if (syncfd >= 0) {						\
 | 
			
		||||
			enum sync_t s = SYNC_ERR;				\
 | 
			
		||||
			if (write(syncfd, &s, sizeof(s)) != sizeof(s))		\
 | 
			
		||||
				fprintf(stderr, "nsenter: failed: write(s)");	\
 | 
			
		||||
			if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret))	\
 | 
			
		||||
				fprintf(stderr, "nsenter: failed: write(ret)");	\
 | 
			
		||||
		}								\
 | 
			
		||||
		exit(ret);							\
 | 
			
		||||
	} while(0)
 | 
			
		||||
 | 
			
		||||
static int write_file(char *data, size_t data_len, char *pathfmt, ...)
 | 
			
		||||
{
 | 
			
		||||
	int fd, len, ret = 0;
 | 
			
		||||
	char path[PATH_MAX];
 | 
			
		||||
 | 
			
		||||
	va_list ap;
 | 
			
		||||
	va_start(ap, pathfmt);
 | 
			
		||||
	len = vsnprintf(path, PATH_MAX, pathfmt, ap);
 | 
			
		||||
	va_end(ap);
 | 
			
		||||
	if (len < 0)
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	fd = open(path, O_RDWR);
 | 
			
		||||
	if (fd < 0) {
 | 
			
		||||
		ret = -1;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	len = write(fd, data, data_len);
 | 
			
		||||
	if (len != data_len) {
 | 
			
		||||
		ret = -1;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	close(fd);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
enum policy_t {
 | 
			
		||||
	SETGROUPS_DEFAULT = 0,
 | 
			
		||||
	SETGROUPS_ALLOW,
 | 
			
		||||
	SETGROUPS_DENY,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* This *must* be called before we touch gid_map. */
 | 
			
		||||
static void update_setgroups(int pid, enum policy_t setgroup)
 | 
			
		||||
{
 | 
			
		||||
	char *policy;
 | 
			
		||||
 | 
			
		||||
	switch (setgroup) {
 | 
			
		||||
		case SETGROUPS_ALLOW:
 | 
			
		||||
			policy = "allow";
 | 
			
		||||
			break;
 | 
			
		||||
		case SETGROUPS_DENY:
 | 
			
		||||
			policy = "deny";
 | 
			
		||||
			break;
 | 
			
		||||
		case SETGROUPS_DEFAULT:
 | 
			
		||||
			/* Nothing to do. */
 | 
			
		||||
			return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * If the kernel is too old to support /proc/pid/setgroups,
 | 
			
		||||
		 * open(2) or write(2) will return ENOENT. This is fine.
 | 
			
		||||
		 */
 | 
			
		||||
		if (errno != ENOENT)
 | 
			
		||||
			bail("failed to write '%s' to /proc/%d/setgroups", policy, pid);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void update_uidmap(int pid, char *map, int map_len)
 | 
			
		||||
{
 | 
			
		||||
	if (map == NULL || map_len <= 0)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0)
 | 
			
		||||
		bail("failed to update /proc/%d/uid_map", pid);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void update_gidmap(int pid, char *map, int map_len)
 | 
			
		||||
{
 | 
			
		||||
	if (map == NULL || map_len <= 0)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0)
 | 
			
		||||
		bail("failed to update /proc/%d/gid_map", pid);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* A dummy function that just jumps to the given jumpval. */
 | 
			
		||||
static int child_func(void *arg) __attribute__ ((noinline));
 | 
			
		||||
static int child_func(void *arg)
 | 
			
		||||
{
 | 
			
		||||
	struct clone_t *ca = (struct clone_t *)arg;
 | 
			
		||||
	longjmp(*ca->env, ca->jmpval);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
 | 
			
		||||
static int clone_parent(jmp_buf *env, int jmpval)
 | 
			
		||||
{
 | 
			
		||||
	struct clone_t ca = {
 | 
			
		||||
		.env    = env,
 | 
			
		||||
		.jmpval = jmpval,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Gets the init pipe fd from the environment, which is used to read the
 | 
			
		||||
 * bootstrap data and tell the parent what the new pid is after we finish
 | 
			
		||||
 * setting up the environment.
 | 
			
		||||
 */
 | 
			
		||||
static int initpipe(void)
 | 
			
		||||
{
 | 
			
		||||
	int pipenum;
 | 
			
		||||
	char *initpipe, *endptr;
 | 
			
		||||
 | 
			
		||||
	initpipe = getenv("_LIBCONTAINER_INITPIPE");
 | 
			
		||||
	if (initpipe == NULL || *initpipe == '\0')
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	pipenum = strtol(initpipe, &endptr, 10);
 | 
			
		||||
	if (*endptr != '\0')
 | 
			
		||||
		bail("unable to parse _LIBCONTAINER_INITPIPE");
 | 
			
		||||
 | 
			
		||||
	return pipenum;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Returns the clone(2) flag for a namespace, given the name of a namespace. */
 | 
			
		||||
static int nsflag(char *name)
 | 
			
		||||
{
 | 
			
		||||
	if (!strcmp(name, "cgroup"))
 | 
			
		||||
		return CLONE_NEWCGROUP;
 | 
			
		||||
	else if (!strcmp(name, "ipc"))
 | 
			
		||||
		return CLONE_NEWIPC;
 | 
			
		||||
	else if (!strcmp(name, "mnt"))
 | 
			
		||||
		return CLONE_NEWNS;
 | 
			
		||||
	else if (!strcmp(name, "net"))
 | 
			
		||||
		return CLONE_NEWNET;
 | 
			
		||||
	else if (!strcmp(name, "pid"))
 | 
			
		||||
		return CLONE_NEWPID;
 | 
			
		||||
	else if (!strcmp(name, "user"))
 | 
			
		||||
		return CLONE_NEWUSER;
 | 
			
		||||
	else if (!strcmp(name, "uts"))
 | 
			
		||||
		return CLONE_NEWUTS;
 | 
			
		||||
 | 
			
		||||
	/* If we don't recognise a name, fallback to 0. */
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint32_t readint32(char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return *(uint32_t *) buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint8_t readint8(char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return *(uint8_t *) buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void nl_parse(int fd, struct nlconfig_t *config)
 | 
			
		||||
{
 | 
			
		||||
	size_t len, size;
 | 
			
		||||
	struct nlmsghdr hdr;
 | 
			
		||||
	char *data, *current;
 | 
			
		||||
 | 
			
		||||
	/* Retrieve the netlink header. */
 | 
			
		||||
	len = read(fd, &hdr, NLMSG_HDRLEN);
 | 
			
		||||
	if (len != NLMSG_HDRLEN)
 | 
			
		||||
		bail("invalid netlink header length %lu", len);
 | 
			
		||||
 | 
			
		||||
	if (hdr.nlmsg_type == NLMSG_ERROR)
 | 
			
		||||
		bail("failed to read netlink message");
 | 
			
		||||
 | 
			
		||||
	if (hdr.nlmsg_type != INIT_MSG)
 | 
			
		||||
		bail("unexpected msg type %d", hdr.nlmsg_type);
 | 
			
		||||
 | 
			
		||||
	/* Retrieve data. */
 | 
			
		||||
	size = NLMSG_PAYLOAD(&hdr, 0);
 | 
			
		||||
	current = data = malloc(size);
 | 
			
		||||
	if (!data)
 | 
			
		||||
		bail("failed to allocate %zu bytes of memory for nl_payload", size);
 | 
			
		||||
 | 
			
		||||
	len = read(fd, data, size);
 | 
			
		||||
	if (len != size)
 | 
			
		||||
		bail("failed to read netlink payload, %lu != %lu", len, size);
 | 
			
		||||
 | 
			
		||||
	/* Parse the netlink payload. */
 | 
			
		||||
	config->data = data;
 | 
			
		||||
	config->consolefd = -1;
 | 
			
		||||
	while (current < data + size) {
 | 
			
		||||
		struct nlattr *nlattr = (struct nlattr *)current;
 | 
			
		||||
		size_t payload_len = nlattr->nla_len - NLA_HDRLEN;
 | 
			
		||||
 | 
			
		||||
		/* Advance to payload. */
 | 
			
		||||
		current += NLA_HDRLEN;
 | 
			
		||||
 | 
			
		||||
		/* Handle payload. */
 | 
			
		||||
		switch (nlattr->nla_type) {
 | 
			
		||||
		case CLONE_FLAGS_ATTR:
 | 
			
		||||
			config->cloneflags = readint32(current);
 | 
			
		||||
			break;
 | 
			
		||||
		case CONSOLE_PATH_ATTR:
 | 
			
		||||
			/*
 | 
			
		||||
			 * We open the console here because we currently evaluate console
 | 
			
		||||
			 * paths from the *host* namespaces.
 | 
			
		||||
			 */
 | 
			
		||||
			config->consolefd = open(current, O_RDWR);
 | 
			
		||||
			if (config->consolefd < 0)
 | 
			
		||||
				bail("failed to open console %s", current);
 | 
			
		||||
			break;
 | 
			
		||||
		case NS_PATHS_ATTR:
 | 
			
		||||
			config->namespaces = current;
 | 
			
		||||
			config->namespaces_len = payload_len;
 | 
			
		||||
			break;
 | 
			
		||||
		case UIDMAP_ATTR:
 | 
			
		||||
			config->uidmap = current;
 | 
			
		||||
			config->uidmap_len = payload_len;
 | 
			
		||||
			break;
 | 
			
		||||
		case GIDMAP_ATTR:
 | 
			
		||||
			config->gidmap = current;
 | 
			
		||||
			config->gidmap_len = payload_len;
 | 
			
		||||
			break;
 | 
			
		||||
		case SETGROUP_ATTR:
 | 
			
		||||
			config->is_setgroup = readint8(current);
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
			bail("unknown netlink message type %d", nlattr->nla_type);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		current += NLA_ALIGN(payload_len);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void nl_free(struct nlconfig_t *config)
 | 
			
		||||
{
 | 
			
		||||
	free(config->data);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void join_namespaces(char *nslist)
 | 
			
		||||
{
 | 
			
		||||
	int num = 0, i;
 | 
			
		||||
	char *saveptr = NULL;
 | 
			
		||||
	char *namespace = strtok_r(nslist, ",", &saveptr);
 | 
			
		||||
	struct namespace_t {
 | 
			
		||||
		int fd;
 | 
			
		||||
		int ns;
 | 
			
		||||
		char type[PATH_MAX];
 | 
			
		||||
		char path[PATH_MAX];
 | 
			
		||||
	} *namespaces = NULL;
 | 
			
		||||
 | 
			
		||||
	if (!namespace || !strlen(namespace) || !strlen(nslist))
 | 
			
		||||
		bail("ns paths are empty");
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * We have to open the file descriptors first, since after
 | 
			
		||||
	 * we join the mnt namespace we might no longer be able to
 | 
			
		||||
	 * access the paths.
 | 
			
		||||
	 */
 | 
			
		||||
	do {
 | 
			
		||||
		int fd;
 | 
			
		||||
		char *path;
 | 
			
		||||
		struct namespace_t *ns;
 | 
			
		||||
 | 
			
		||||
		/* Resize the namespace array. */
 | 
			
		||||
		namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
 | 
			
		||||
		if (!namespaces)
 | 
			
		||||
			bail("failed to reallocate namespace array");
 | 
			
		||||
		ns = &namespaces[num - 1];
 | 
			
		||||
 | 
			
		||||
		/* Split 'ns:path'. */
 | 
			
		||||
		path = strstr(namespace, ":");
 | 
			
		||||
		if (!path)
 | 
			
		||||
			bail("failed to parse %s", namespace);
 | 
			
		||||
		*path++ = '\0';
 | 
			
		||||
 | 
			
		||||
		fd = open(path, O_RDONLY);
 | 
			
		||||
		if (fd < 0)
 | 
			
		||||
			bail("failed to open %s", namespace);
 | 
			
		||||
 | 
			
		||||
		ns->fd = fd;
 | 
			
		||||
		ns->ns = nsflag(namespace);
 | 
			
		||||
		strncpy(ns->path, path, PATH_MAX);
 | 
			
		||||
	} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * The ordering in which we join namespaces is important. We should
 | 
			
		||||
	 * always join the user namespace *first*. This is all guaranteed
 | 
			
		||||
	 * from the container_linux.go side of this, so we're just going to
 | 
			
		||||
	 * follow the order given to us.
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < num; i++) {
 | 
			
		||||
		struct namespace_t ns = namespaces[i];
 | 
			
		||||
 | 
			
		||||
		if (setns(ns.fd, ns.ns) < 0)
 | 
			
		||||
			bail("failed to setns to %s", ns.path);
 | 
			
		||||
 | 
			
		||||
		close(ns.fd);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	free(namespaces);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void nsexec(void)
 | 
			
		||||
{
 | 
			
		||||
	int pipenum;
 | 
			
		||||
	jmp_buf env;
 | 
			
		||||
	int syncpipe[2];
 | 
			
		||||
	struct nlconfig_t config = {0};
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If we don't have an init pipe, just return to the go routine.
 | 
			
		||||
	 * We'll only get an init pipe for start or exec.
 | 
			
		||||
	 */
 | 
			
		||||
	pipenum = initpipe();
 | 
			
		||||
	if (pipenum == -1)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/* Parse all of the netlink configuration. */
 | 
			
		||||
	nl_parse(pipenum, &config);
 | 
			
		||||
 | 
			
		||||
	/* Pipe so we can tell the child when we've finished setting up. */
 | 
			
		||||
	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
 | 
			
		||||
		bail("failed to setup sync pipe between parent and child");
 | 
			
		||||
 | 
			
		||||
	/* TODO: Currently we aren't dealing with child deaths properly. */
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Okay, so this is quite annoying.
 | 
			
		||||
	 *
 | 
			
		||||
	 * In order for this unsharing code to be more extensible we need to split
 | 
			
		||||
	 * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case
 | 
			
		||||
	 * would be if we did clone(CLONE_NEWUSER) and the other namespaces
 | 
			
		||||
	 * separately, but because of SELinux issues we cannot really do that. But
 | 
			
		||||
	 * we cannot just dump the namespace flags into clone(...) because several
 | 
			
		||||
	 * usecases (such as rootless containers) require more granularity around
 | 
			
		||||
	 * the namespace setup. In addition, some older kernels had issues where
 | 
			
		||||
	 * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot
 | 
			
		||||
	 * handle this while also dealing with SELinux so we choose SELinux support
 | 
			
		||||
	 * over broken kernel support).
 | 
			
		||||
	 *
 | 
			
		||||
	 * However, if we unshare(2) the user namespace *before* we clone(2), then
 | 
			
		||||
	 * all hell breaks loose.
 | 
			
		||||
	 *
 | 
			
		||||
	 * The parent no longer has permissions to do many things (unshare(2) drops
 | 
			
		||||
	 * all capabilities in your old namespace), and the container cannot be set
 | 
			
		||||
	 * up to have more than one {uid,gid} mapping. This is obviously less than
 | 
			
		||||
	 * ideal. In order to fix this, we have to first clone(2) and then unshare.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Unfortunately, it's not as simple as that. We have to fork to enter the
 | 
			
		||||
	 * PID namespace (the PID namespace only applies to children). Since we'll
 | 
			
		||||
	 * have to double-fork, this clone_parent() call won't be able to get the
 | 
			
		||||
	 * PID of the _actual_ init process (without doing more synchronisation than
 | 
			
		||||
	 * I can deal with at the moment). So we'll just get the parent to send it
 | 
			
		||||
	 * for us, the only job of this process is to update
 | 
			
		||||
	 * /proc/pid/{setgroups,uid_map,gid_map}.
 | 
			
		||||
	 *
 | 
			
		||||
	 * And as a result of the above, we also need to setns(2) in the first child
 | 
			
		||||
	 * because if we join a PID namespace in the topmost parent then our child
 | 
			
		||||
	 * will be in that namespace (and it will not be able to give us a PID value
 | 
			
		||||
	 * that makes sense without resorting to sending things with cmsg).
 | 
			
		||||
	 *
 | 
			
		||||
	 * This also deals with an older issue caused by dumping cloneflags into
 | 
			
		||||
	 * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so
 | 
			
		||||
	 * we have to unshare(2) before clone(2) in order to do this. This was fixed
 | 
			
		||||
	 * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was
 | 
			
		||||
	 * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're
 | 
			
		||||
	 * aware, the last mainline kernel which had this bug was Linux 3.12.
 | 
			
		||||
	 * However, we cannot comment on which kernels the broken patch was
 | 
			
		||||
	 * backported to.
 | 
			
		||||
	 *
 | 
			
		||||
	 * -- Aleksa "what has my life come to?" Sarai
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	switch (setjmp(env)) {
 | 
			
		||||
	/*
 | 
			
		||||
	 * Stage 0: We're in the parent. Our job is just to create a new child
 | 
			
		||||
	 *          (stage 1: JUMP_CHILD) process and write its uid_map and
 | 
			
		||||
	 *          gid_map. That process will go on to create a new process, then
 | 
			
		||||
	 *          it will send us its PID which we will send to the bootstrap
 | 
			
		||||
	 *          process.
 | 
			
		||||
	 */
 | 
			
		||||
	case JUMP_PARENT: {
 | 
			
		||||
			int len;
 | 
			
		||||
			pid_t child;
 | 
			
		||||
			char buf[JSON_MAX];
 | 
			
		||||
 | 
			
		||||
			/* For debugging. */
 | 
			
		||||
			prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
 | 
			
		||||
 | 
			
		||||
			/* Start the process of getting a container. */
 | 
			
		||||
			child = clone_parent(&env, JUMP_CHILD);
 | 
			
		||||
			if (child < 0)
 | 
			
		||||
				bail("unable to fork: child_func");
 | 
			
		||||
 | 
			
		||||
			/* State machine for synchronisation with the children. */
 | 
			
		||||
			while (true) {
 | 
			
		||||
				enum sync_t s;
 | 
			
		||||
 | 
			
		||||
				/* This doesn't need to be global, we're in the parent. */
 | 
			
		||||
				int syncfd = syncpipe[1];
 | 
			
		||||
 | 
			
		||||
				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 | 
			
		||||
					bail("failed to sync with child: next state");
 | 
			
		||||
 | 
			
		||||
				switch (s) {
 | 
			
		||||
				case SYNC_ERR: {
 | 
			
		||||
						/* We have to mirror the error code of the child. */
 | 
			
		||||
						int ret;
 | 
			
		||||
 | 
			
		||||
						if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
 | 
			
		||||
							bail("failed to sync with child: read(error code)");
 | 
			
		||||
 | 
			
		||||
						exit(ret);
 | 
			
		||||
					}
 | 
			
		||||
					break;
 | 
			
		||||
				case SYNC_USERMAP_PLS:
 | 
			
		||||
					/* Enable setgroups(2) if we've been asked to. */
 | 
			
		||||
					if (config.is_setgroup)
 | 
			
		||||
						update_setgroups(child, SETGROUPS_ALLOW);
 | 
			
		||||
 | 
			
		||||
					/* Set up mappings. */
 | 
			
		||||
					update_uidmap(child, config.uidmap, config.uidmap_len);
 | 
			
		||||
					update_gidmap(child, config.gidmap, config.gidmap_len);
 | 
			
		||||
 | 
			
		||||
					s = SYNC_USERMAP_ACK;
 | 
			
		||||
					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
 | 
			
		||||
						kill(child, SIGKILL);
 | 
			
		||||
						bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
 | 
			
		||||
					}
 | 
			
		||||
					break;
 | 
			
		||||
				case SYNC_USERMAP_ACK:
 | 
			
		||||
					/* We should _never_ receive acks. */
 | 
			
		||||
					kill(child, SIGKILL);
 | 
			
		||||
					bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
 | 
			
		||||
					break;
 | 
			
		||||
				case SYNC_RECVPID_PLS: {
 | 
			
		||||
						pid_t old = child;
 | 
			
		||||
 | 
			
		||||
						/* Get the init_func pid. */
 | 
			
		||||
						if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
 | 
			
		||||
							kill(old, SIGKILL);
 | 
			
		||||
							bail("failed to sync with child: read(childpid)");
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						/* Send ACK. */
 | 
			
		||||
						s = SYNC_RECVPID_ACK;
 | 
			
		||||
						if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
 | 
			
		||||
							kill(old, SIGKILL);
 | 
			
		||||
							kill(child, SIGKILL);
 | 
			
		||||
							bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
					/* Leave the loop. */
 | 
			
		||||
					goto out;
 | 
			
		||||
				case SYNC_RECVPID_ACK:
 | 
			
		||||
					/* We should _never_ receive acks. */
 | 
			
		||||
					kill(child, SIGKILL);
 | 
			
		||||
					bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
 | 
			
		||||
					break;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
		out:
 | 
			
		||||
			/* Send the init_func pid back to our parent. */
 | 
			
		||||
			len = snprintf(buf, JSON_MAX, "{\"pid\": %d}\n", child);
 | 
			
		||||
			if (len < 0) {
 | 
			
		||||
				kill(child, SIGKILL);
 | 
			
		||||
				bail("unable to generate JSON for child pid");
 | 
			
		||||
			}
 | 
			
		||||
			if (write(pipenum, buf, len) != len) {
 | 
			
		||||
				kill(child, SIGKILL);
 | 
			
		||||
				bail("unable to send child pid to bootstrapper");
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			exit(0);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Stage 1: We're in the first child process. Our job is to join any
 | 
			
		||||
	 *          provided user namespaces in the netlink payload. If we've been
 | 
			
		||||
	 *          asked to CLONE_NEWUSER, we will unshare the user namespace and
 | 
			
		||||
	 *          ask our parent (stage 0) to set up our user mappings for us.
 | 
			
		||||
	 *          Then, we unshare the rest of the requested namespaces and
 | 
			
		||||
	 *          create a new child (stage 2: JUMP_INIT).  We then send the
 | 
			
		||||
	 *          child's PID to our parent (stage 0).
 | 
			
		||||
	 */
 | 
			
		||||
	case JUMP_CHILD: {
 | 
			
		||||
			pid_t child;
 | 
			
		||||
			enum sync_t s;
 | 
			
		||||
 | 
			
		||||
			/* We're in a child and thus need to tell the parent if we die. */
 | 
			
		||||
			syncfd = syncpipe[0];
 | 
			
		||||
 | 
			
		||||
			/* For debugging. */
 | 
			
		||||
			prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * We need to setns first. We cannot do this earlier (in stage 0)
 | 
			
		||||
			 * because of the fact that we forked to get here (the PID of
 | 
			
		||||
			 * [stage 2: JUMP_INIT]) would be meaningless). We could send it
 | 
			
		||||
			 * using cmsg(3) but that's just annoying.
 | 
			
		||||
			 */
 | 
			
		||||
			if (config.namespaces)
 | 
			
		||||
				join_namespaces(config.namespaces);
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * Unshare all of the namespaces. Now, it should be noted that this
 | 
			
		||||
			 * ordering might break in the future (especially with rootless
 | 
			
		||||
			 * containers). But for now, it's not possible to split this into
 | 
			
		||||
			 * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
 | 
			
		||||
			 *
 | 
			
		||||
			 * Note that we don't merge this with clone() because there were
 | 
			
		||||
			 * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
 | 
			
		||||
			 * was broken, so we'll just do it the long way anyway.
 | 
			
		||||
			 */
 | 
			
		||||
			if (unshare(config.cloneflags) < 0)
 | 
			
		||||
				bail("failed to unshare namespaces");
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * Deal with user namespaces first. They are quite special, as they
 | 
			
		||||
			 * affect our ability to unshare other namespaces and are used as
 | 
			
		||||
			 * context for privilege checks.
 | 
			
		||||
			 */
 | 
			
		||||
			if (config.cloneflags & CLONE_NEWUSER) {
 | 
			
		||||
				/*
 | 
			
		||||
				 * We don't have the privileges to do any mapping here (see the
 | 
			
		||||
				 * clone_parent rant). So signal our parent to hook us up.
 | 
			
		||||
				 */
 | 
			
		||||
 | 
			
		||||
				s = SYNC_USERMAP_PLS;
 | 
			
		||||
				if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 | 
			
		||||
					bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
 | 
			
		||||
 | 
			
		||||
				/* ... wait for mapping ... */
 | 
			
		||||
 | 
			
		||||
				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 | 
			
		||||
					bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
 | 
			
		||||
				if (s != SYNC_USERMAP_ACK)
 | 
			
		||||
					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* TODO: What about non-namespace clone flags that we're dropping here? */
 | 
			
		||||
			child = clone_parent(&env, JUMP_INIT);
 | 
			
		||||
			if (child < 0)
 | 
			
		||||
				bail("unable to fork: init_func");
 | 
			
		||||
 | 
			
		||||
			/* Send the child to our parent, which knows what it's doing. */
 | 
			
		||||
			s = SYNC_RECVPID_PLS;
 | 
			
		||||
			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
 | 
			
		||||
				kill(child, SIGKILL);
 | 
			
		||||
				bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
 | 
			
		||||
			}
 | 
			
		||||
			if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
 | 
			
		||||
				kill(child, SIGKILL);
 | 
			
		||||
				bail("failed to sync with parent: write(childpid)");
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* ... wait for parent to get the pid ... */
 | 
			
		||||
 | 
			
		||||
			if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
 | 
			
		||||
				kill(child, SIGKILL);
 | 
			
		||||
				bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
 | 
			
		||||
			}
 | 
			
		||||
			if (s != SYNC_RECVPID_ACK) {
 | 
			
		||||
				kill(child, SIGKILL);
 | 
			
		||||
				bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
 | 
			
		||||
			exit(0);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Stage 2: We're the final child process, and the only process that will
 | 
			
		||||
	 *          actually return to the Go runtime. Our job is to just do the
 | 
			
		||||
	 *          final cleanup steps and then return to the Go runtime to allow
 | 
			
		||||
	 *          init_linux.go to run.
 | 
			
		||||
	 */
 | 
			
		||||
	case JUMP_INIT: {
 | 
			
		||||
			/*
 | 
			
		||||
			 * We're inside the child now, having jumped from the
 | 
			
		||||
			 * start_child() code after forking in the parent.
 | 
			
		||||
			 */
 | 
			
		||||
			int consolefd = config.consolefd;
 | 
			
		||||
 | 
			
		||||
			/* We're in a child and thus need to tell the parent if we die. */
 | 
			
		||||
			syncfd = syncpipe[0];
 | 
			
		||||
 | 
			
		||||
			/* For debugging. */
 | 
			
		||||
			prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
 | 
			
		||||
 | 
			
		||||
			if (setsid() < 0)
 | 
			
		||||
				bail("setsid failed");
 | 
			
		||||
 | 
			
		||||
			if (setuid(0) < 0)
 | 
			
		||||
				bail("setuid failed");
 | 
			
		||||
 | 
			
		||||
			if (setgid(0) < 0)
 | 
			
		||||
				bail("setgid failed");
 | 
			
		||||
 | 
			
		||||
			if (setgroups(0, NULL) < 0)
 | 
			
		||||
				bail("setgroups failed");
 | 
			
		||||
 | 
			
		||||
			if (consolefd != -1) {
 | 
			
		||||
				if (ioctl(consolefd, TIOCSCTTY, 0) < 0)
 | 
			
		||||
					bail("ioctl TIOCSCTTY failed");
 | 
			
		||||
				if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO)
 | 
			
		||||
					bail("failed to dup stdin");
 | 
			
		||||
				if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO)
 | 
			
		||||
					bail("failed to dup stdout");
 | 
			
		||||
				if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO)
 | 
			
		||||
					bail("failed to dup stderr");
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* Close sync pipes. */
 | 
			
		||||
			close(syncpipe[0]);
 | 
			
		||||
			close(syncpipe[1]);
 | 
			
		||||
 | 
			
		||||
			/* Free netlink data. */
 | 
			
		||||
			nl_free(&config);
 | 
			
		||||
 | 
			
		||||
			/* Finish executing, let the Go runtime take over. */
 | 
			
		||||
			return;
 | 
			
		||||
		}
 | 
			
		||||
	default:
 | 
			
		||||
		bail("unexpected jump value");
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Should never be reached. */
 | 
			
		||||
	bail("should never be reached");
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										12
									
								
								vendor/github.com/opencontainers/runc/libcontainer/selinux/selinux.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/opencontainers/runc/libcontainer/selinux/selinux.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -355,6 +355,12 @@ func FreeLxcContexts(scon string) {
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var roFileLabel string
 | 
			
		||||
 | 
			
		||||
func GetROFileLabel() (fileLabel string) {
 | 
			
		||||
	return roFileLabel
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func GetLxcContexts() (processLabel string, fileLabel string) {
 | 
			
		||||
	var (
 | 
			
		||||
		val, key string
 | 
			
		||||
| 
						 | 
				
			
			@ -399,6 +405,9 @@ func GetLxcContexts() (processLabel string, fileLabel string) {
 | 
			
		|||
			if key == "file" {
 | 
			
		||||
				fileLabel = strings.Trim(val, "\"")
 | 
			
		||||
			}
 | 
			
		||||
			if key == "ro_file" {
 | 
			
		||||
				roFileLabel = strings.Trim(val, "\"")
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -406,6 +415,9 @@ func GetLxcContexts() (processLabel string, fileLabel string) {
 | 
			
		|||
		return "", ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if roFileLabel == "" {
 | 
			
		||||
		roFileLabel = fileLabel
 | 
			
		||||
	}
 | 
			
		||||
exit:
 | 
			
		||||
	//	mcs := IntToMcs(os.Getpid(), 1024)
 | 
			
		||||
	mcs := uniqMcs(1024)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										20
									
								
								vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
									
										
									
										generated
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -14,8 +14,10 @@ func GetProcessStartTime(pid int) (string, error) {
 | 
			
		|||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	return parseStartTime(string(data))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
	parts := strings.Split(string(data), " ")
 | 
			
		||||
func parseStartTime(stat string) (string, error) {
 | 
			
		||||
	// the starttime is located at pos 22
 | 
			
		||||
	// from the man page
 | 
			
		||||
	//
 | 
			
		||||
| 
						 | 
				
			
			@ -23,5 +25,19 @@ func GetProcessStartTime(pid int) (string, error) {
 | 
			
		|||
	// (22)  The  time the process started after system boot.  In kernels before Linux 2.6, this
 | 
			
		||||
	// value was expressed in jiffies.  Since Linux 2.6, the value is expressed in  clock  ticks
 | 
			
		||||
	// (divide by sysconf(_SC_CLK_TCK)).
 | 
			
		||||
	return parts[22-1], nil // starts at 1
 | 
			
		||||
	//
 | 
			
		||||
	// NOTE:
 | 
			
		||||
	// pos 2 could contain space and is inside `(` and `)`:
 | 
			
		||||
	// (2) comm  %s
 | 
			
		||||
	// The filename of the executable, in parentheses.
 | 
			
		||||
	// This is visible whether or not the executable is
 | 
			
		||||
	// swapped out.
 | 
			
		||||
	//
 | 
			
		||||
	// the following is an example:
 | 
			
		||||
	// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 | 
			
		||||
 | 
			
		||||
	// get parts after last `)`:
 | 
			
		||||
	s := strings.Split(stat, ")")
 | 
			
		||||
	parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
 | 
			
		||||
	return parts[22-3], nil // starts at 3 (after the filename pos `2`)
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue