1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Merge pull request #2887 from kleptog/master

Add mkseccomp.pl, helper script to make seccomp profiles.
This commit is contained in:
Tianon Gravi 2013-11-26 15:24:35 -08:00
commit 6344e6f258
2 changed files with 521 additions and 0 deletions

77
contrib/mkseccomp.pl Executable file
View file

@ -0,0 +1,77 @@
#!/usr/bin/perl
#
# A simple helper script to help people build seccomp profiles for
# Docker/LXC. The goal is mostly to reduce the attack surface to the
# kernel, by restricting access to rarely used, recently added or not used
# syscalls.
#
# This script processes one or more files which contain the list of system
# calls to be allowed. See mkseccomp.sample for more information how you
# can configure the list of syscalls. When run, this script produces output
# which, when stored in a file, can be passed to docker as follows:
#
# docker run -lxc-conf="lxc.seccomp=$file" <rest of arguments>
#
# The included sample file shows how to cut about a quarter of all syscalls,
# which affecting most applications.
#
# For specific situations it is possible to reduce the list further. By
# reducing the list to just those syscalls required by a certain application
# you can make it difficult for unknown/unexpected code to run.
#
# Run this script as follows:
#
# ./mkseccomp.pl < mkseccomp.sample >syscalls.list
# or
# ./mkseccomp.pl mkseccomp.sample >syscalls.list
#
# Multiple files can be specified, in which case the lists of syscalls are
# combined.
#
# By Martijn van Oosterhout <kleptog@svana.org> Nov 2013
# How it works:
#
# This program basically spawns two processes to form a chain like:
#
# <process data section to prefix __NR_> | cpp | <add header and filter unknown syscalls>
use strict;
use warnings;
if( -t ) {
print STDERR "Helper script to make seccomp filters for Docker/LXC.\n";
print STDERR "Usage: mkseccomp.pl [files...]\n";
exit 1;
}
my $pid = open(my $in, "-|") // die "Couldn't fork1 ($!)\n";
if($pid == 0) { # Child
$pid = open(my $out, "|-") // die "Couldn't fork2 ($!)\n";
if($pid == 0) { # Child, which execs cpp
exec "cpp" or die "Couldn't exec cpp ($!)\n";
exit 1;
}
# Process the DATA section and output to cpp
print $out "#include <sys/syscall.h>\n";
while(<>) {
if(/^\w/) {
print $out "__NR_$_";
}
}
close $out;
exit 0;
}
# Print header and then process output from cpp.
print "1\n";
print "whitelist\n";
while(<$in>) {
print if( /^[0-9]/ );
}

444
contrib/mkseccomp.sample Normal file
View file

@ -0,0 +1,444 @@
/* This sample file is an example for mkseccomp.pl to produce a seccomp file
* which restricts syscalls that are only useful for an admin but allows the
* vast majority of normal userspace programs to run normally.
*
* The format of this file is one line per syscall. This is then processed
* and passed to 'cpp' to convert the names to numbers using whatever is
* correct for your platform. As such C-style comments are permitted. Note
* this also means that C preprocessor macros are also allowed. So it is
* possible to create groups surrounded by #ifdef/#endif and control their
* inclusion via #define (not #include).
*
* Syscalls that don't exist on your architecture are silently filtered out.
* Syscalls marked with (*) are required for a container to spawn a bash
* shell successfully (not necessarily full featured). Listing the same
* syscall multiple times is no problem.
*
* If you want to make a list specifically for one application the easiest
* way is to run the application under strace, like so:
*
* $ strace -f -q -c -o strace.out application args...
*
* Once you have a reasonable sample of the execution of the program, exit
* it. The file strace.out will have a summary of the syscalls used. Copy
* that list into this file, comment out everything else except the starred
* syscalls (which you need for the container to start) and you're done.
*
* To get the list of syscalls from the strace output this works well for
* me
*
* $ cut -c52 < strace.out
*
* This sample list was compiled as a combination of all the syscalls
* available on i386 and amd64 on Ubuntu Precise, as such it may not contain
* everything and not everything may be relevent for your system. This
* shouldn't be a problem.
*/
// Filesystem/File descriptor related
access // (*)
chdir // (*)
chmod
chown
chown32
close // (*)
creat
dup // (*)
dup2 // (*)
dup3
epoll_create
epoll_create1
epoll_ctl
epoll_ctl_old
epoll_pwait
epoll_wait
epoll_wait_old
eventfd
eventfd2
faccessat // (*)
fadvise64
fadvise64_64
fallocate
fanotify_init
fanotify_mark
ioctl // (*)
fchdir
fchmod
fchmodat
fchown
fchown32
fchownat
fcntl // (*)
fcntl64
fdatasync
fgetxattr
flistxattr
flock
fremovexattr
fsetxattr
fstat // (*)
fstat64
fstatat64
fstatfs
fstatfs64
fsync
ftruncate
ftruncate64
getcwd // (*)
getdents // (*)
getdents64
getxattr
inotify_add_watch
inotify_init
inotify_init1
inotify_rm_watch
io_cancel
io_destroy
io_getevents
io_setup
io_submit
lchown
lchown32
lgetxattr
link
linkat
listxattr
llistxattr
llseek
_llseek
lremovexattr
lseek // (*)
lsetxattr
lstat
lstat64
mkdir
mkdirat
mknod
mknodat
newfstatat
_newselect
oldfstat
oldlstat
oldolduname
oldstat
olduname
oldwait4
open // (*)
openat // (*)
pipe // (*)
pipe2
poll
ppoll
pread64
preadv
futimesat
pselect6
pwrite64
pwritev
read // (*)
readahead
readdir
readlink
readlinkat
readv
removexattr
rename
renameat
rmdir
select
sendfile
sendfile64
setxattr
splice
stat // (*)
stat64
statfs // (*)
statfs64
symlink
symlinkat
sync
sync_file_range
sync_file_range2
syncfs
tee
truncate
truncate64
umask
unlink
unlinkat
ustat
utime
utimensat
utimes
write // (*)
writev
// Network related
accept
accept4
bind // (*)
connect // (*)
getpeername
getsockname // (*)
getsockopt
listen
recv
recvfrom // (*)
recvmmsg
recvmsg
send
sendmmsg
sendmsg
sendto // (*)
setsockopt
shutdown
socket // (*)
socketcall
socketpair
// Signal related
pause
rt_sigaction // (*)
rt_sigpending
rt_sigprocmask // (*)
rt_sigqueueinfo
rt_sigreturn // (*)
rt_sigsuspend
rt_sigtimedwait
rt_tgsigqueueinfo
sigaction
sigaltstack // (*)
signal
signalfd
signalfd4
sigpending
sigprocmask
sigreturn
sigsuspend
// Other needed POSIX
alarm
brk // (*)
clock_adjtime
clock_getres
clock_gettime
clock_nanosleep
//clock_settime
gettimeofday
nanosleep
nice
sysinfo
syslog
time
timer_create
timer_delete
timerfd_create
timerfd_gettime
timerfd_settime
timer_getoverrun
timer_gettime
timer_settime
times
uname // (*)
// Memory control
madvise
mbind
mincore
mlock
mlockall
mmap // (*)
mmap2
mprotect // (*)
mremap
msync
munlock
munlockall
munmap // (*)
remap_file_pages
set_mempolicy
vmsplice
// Process control
capget
//capset
clone // (*)
execve // (*)
exit // (*)
exit_group // (*)
fork
getcpu
getpgid
getpgrp // (*)
getpid // (*)
getppid // (*)
getpriority
getresgid
getresgid32
getresuid
getresuid32
getrlimit // (*)
getrusage
getsid
getuid // (*)
getuid32
getegid // (*)
getegid32
geteuid // (*)
geteuid32
getgid // (*)
getgid32
getgroups
getgroups32
getitimer
get_mempolicy
kill
//personality
prctl
prlimit64
sched_getaffinity
sched_getparam
sched_get_priority_max
sched_get_priority_min
sched_getscheduler
sched_rr_get_interval
//sched_setaffinity
//sched_setparam
//sched_setscheduler
sched_yield
setfsgid
setfsgid32
setfsuid
setfsuid32
setgid
setgid32
setgroups
setgroups32
setitimer
setpgid // (*)
setpriority
setregid
setregid32
setresgid
setresgid32
setresuid
setresuid32
setreuid
setreuid32
setrlimit
setsid
setuid
setuid32
ugetrlimit
vfork
wait4 // (*)
waitid
waitpid
// IPC
ipc
mq_getsetattr
mq_notify
mq_open
mq_timedreceive
mq_timedsend
mq_unlink
msgctl
msgget
msgrcv
msgsnd
semctl
semget
semop
semtimedop
shmat
shmctl
shmdt
shmget
// Linux specific, mostly needed for thread-related stuff
arch_prctl // (*)
get_robust_list
get_thread_area
gettid
futex // (*)
restart_syscall // (*)
set_robust_list // (*)
set_thread_area
set_tid_address // (*)
tgkill
tkill
// Admin syscalls, these are blocked
//acct
//adjtimex
//bdflush
//chroot
//create_module
//delete_module
//get_kernel_syms // Obsolete
//idle // Obsolete
//init_module
//ioperm
//iopl
//ioprio_get
//ioprio_set
//kexec_load
//lookup_dcookie // oprofile only?
//migrate_pages // NUMA
//modify_ldt
//mount
//move_pages // NUMA
//name_to_handle_at // NFS server
//nfsservctl // NFS server
//open_by_handle_at // NFS server
//perf_event_open
//pivot_root
//process_vm_readv // For debugger
//process_vm_writev // For debugger
//ptrace // For debugger
//query_module
//quotactl
//reboot
//setdomainname
//sethostname
//setns
//settimeofday
//sgetmask // Obsolete
//ssetmask // Obsolete
//stime
//swapoff
//swapon
//_sysctl
//sysfs
//sys_setaltroot
//umount
//umount2
//unshare
//uselib
//vhangup
//vm86
//vm86old
// Kernel key management
//add_key
//keyctl
//request_key
// Unimplemented
//afs_syscall
//break
//ftime
//getpmsg
//gtty
//lock
//madvise1
//mpx
//prof
//profil
//putpmsg
//security
//stty
//tuxcall
//ulimit
//vserver