Signed-off-by: John Howard <jhoward@microsoft.com>
This commit is contained in:
John Howard 2019-03-04 12:44:25 -08:00
parent 25dff4b4ab
commit d1cb9a47ec
4 changed files with 337 additions and 123 deletions

View File

@ -6,7 +6,7 @@ github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git
github.com/golang/gddo 9b12a26f3fbd7397dee4e20939ddca719d840d2a
github.com/gorilla/mux v1.7.0
github.com/Microsoft/opengcs v0.3.9
github.com/Microsoft/opengcs a10967154e143a36014584a6f664344e3bb0aa64
github.com/kr/pty 5cf931ef8f
github.com/mattn/go-shellwords v1.0.3
github.com/sirupsen/logrus v1.0.6

View File

@ -18,24 +18,6 @@ import (
type Mode uint
const (
// Constants for the actual mode after validation
// ModeActualError means an error has occurred during validation
ModeActualError = iota
// ModeActualVhdx means that we are going to use VHDX boot after validation
ModeActualVhdx
// ModeActualKernelInitrd means that we are going to use kernel+initrd for boot after validation
ModeActualKernelInitrd
// Constants for the requested mode
// ModeRequestAuto means auto-select the boot mode for a utility VM
ModeRequestAuto = iota // VHDX will be priority over kernel+initrd
// ModeRequestVhdx means request VHDX boot if possible
ModeRequestVhdx
// ModeRequestKernelInitrd means request Kernel+initrd boot if possible
ModeRequestKernelInitrd
// defaultUvmTimeoutSeconds is the default time to wait for utility VM operations
defaultUvmTimeoutSeconds = 5 * 60
@ -54,8 +36,6 @@ const (
type Config struct {
Options // Configuration options
Name string // Name of the utility VM
RequestedMode Mode // What mode is preferred when validating
ActualMode Mode // What mode was obtained during validation
UvmTimeoutSeconds int // How long to wait for the utility VM to respond in seconds
Uvm hcsshim.Container // The actual container
MappedVirtualDisks []hcsshim.MappedVirtualDisk // Data-disks to be attached
@ -66,9 +46,8 @@ type Options struct {
KirdPath string // Path to where kernel/initrd are found (defaults to %PROGRAMFILES%\Linux Containers)
KernelFile string // Kernel for Utility VM (embedded in a UEFI bootloader) - does NOT include full path, just filename
InitrdFile string // Initrd image for Utility VM - does NOT include full path, just filename
Vhdx string // VHD for booting the utility VM - is a full path
TimeoutSeconds int // Requested time for the utility VM to respond in seconds (may be over-ridden by environment)
BootParameters string // Additional boot parameters for initrd booting (not VHDx)
BootParameters string // Additional boot parameters for initrd booting
}
// ParseOptions parses a set of K-V pairs into options used by opengcs. Note
@ -86,8 +65,6 @@ func ParseOptions(options []string) (Options, error) {
rOpts.KernelFile = opt[1]
case "lcow.initrd":
rOpts.InitrdFile = opt[1]
case "lcow.vhdx":
rOpts.Vhdx = opt[1]
case "lcow.bootparameters":
rOpts.BootParameters = opt[1]
case "lcow.timeout":
@ -106,9 +83,6 @@ func ParseOptions(options []string) (Options, error) {
if rOpts.KirdPath == "" {
rOpts.KirdPath = filepath.Join(os.Getenv("ProgramFiles"), "Linux Containers")
}
if rOpts.Vhdx == "" {
rOpts.Vhdx = filepath.Join(rOpts.KirdPath, `uvm.vhdx`)
}
if rOpts.KernelFile == "" {
rOpts.KernelFile = `kernel`
}
@ -157,47 +131,11 @@ func (config *Config) GenerateDefault(options []string) error {
// Last priority is the default timeout
config.UvmTimeoutSeconds = defaultUvmTimeoutSeconds
// Set the default requested mode
config.RequestedMode = ModeRequestAuto
return nil
}
// Validate validates a Config structure for starting a utility VM.
func (config *Config) Validate() error {
config.ActualMode = ModeActualError
if config.RequestedMode == ModeRequestVhdx && config.Vhdx == "" {
return fmt.Errorf("VHDx mode must supply a VHDx")
}
if config.RequestedMode == ModeRequestKernelInitrd && (config.KernelFile == "" || config.InitrdFile == "") {
return fmt.Errorf("kernel+initrd mode must supply both kernel and initrd")
}
// Validate that if VHDX requested or auto, it exists.
if config.RequestedMode == ModeRequestAuto || config.RequestedMode == ModeRequestVhdx {
if _, err := os.Stat(config.Vhdx); os.IsNotExist(err) {
if config.RequestedMode == ModeRequestVhdx {
return fmt.Errorf("VHDx '%s' not found", config.Vhdx)
}
} else {
config.ActualMode = ModeActualVhdx
// Can't specify boot parameters with VHDx
if config.BootParameters != "" {
return fmt.Errorf("Boot parameters cannot be specified in VHDx mode")
}
return nil
}
}
// So must be kernel+initrd, or auto where we fallback as the VHDX doesn't exist
if config.InitrdFile == "" || config.KernelFile == "" {
if config.RequestedMode == ModeRequestKernelInitrd {
return fmt.Errorf("initrd and kernel options must be supplied")
}
return fmt.Errorf("opengcs: configuration is invalid")
}
if _, err := os.Stat(filepath.Join(config.KirdPath, config.KernelFile)); os.IsNotExist(err) {
return fmt.Errorf("kernel '%s' not found", filepath.Join(config.KirdPath, config.KernelFile))
@ -206,8 +144,6 @@ func (config *Config) Validate() error {
return fmt.Errorf("initrd '%s' not found", filepath.Join(config.KirdPath, config.InitrdFile))
}
config.ActualMode = ModeActualKernelInitrd
// Ensure all the MappedVirtualDisks exist on the host
for _, mvd := range config.MappedVirtualDisks {
if _, err := os.Stat(mvd.HostPath); err != nil {
@ -236,21 +172,12 @@ func (config *Config) StartUtilityVM() error {
ContainerType: "linux",
TerminateOnLastHandleClosed: true,
MappedVirtualDisks: config.MappedVirtualDisks,
}
if config.ActualMode == ModeActualVhdx {
configuration.HvRuntime = &hcsshim.HvRuntime{
ImagePath: config.Vhdx,
BootSource: "Vhd",
WritableBootSource: false,
}
} else {
configuration.HvRuntime = &hcsshim.HvRuntime{
HvRuntime: &hcsshim.HvRuntime{
ImagePath: config.KirdPath,
LinuxInitrdFile: config.InitrdFile,
LinuxKernelFile: config.KernelFile,
LinuxBootParameters: config.BootParameters,
}
},
}
configurationS, _ := json.Marshal(configuration)

View File

@ -1,46 +0,0 @@
// +build windows
package client
import (
"fmt"
"io"
"github.com/sirupsen/logrus"
)
// TarToVhd streams a tarstream contained in an io.Reader to a fixed vhd file
func (config *Config) TarToVhd(targetVHDFile string, reader io.Reader) (int64, error) {
logrus.Debugf("opengcs: TarToVhd: %s", targetVHDFile)
if config.Uvm == nil {
return 0, fmt.Errorf("cannot Tar2Vhd as no utility VM is in configuration")
}
defer config.DebugGCS()
process, err := config.createUtilsProcess("tar2vhd")
if err != nil {
return 0, fmt.Errorf("failed to start tar2vhd for %s: %s", targetVHDFile, err)
}
defer process.Process.Close()
// Send the tarstream into the `tar2vhd`s stdin
if _, err = copyWithTimeout(process.Stdin, reader, 0, config.UvmTimeoutSeconds, fmt.Sprintf("stdin of tar2vhd for generating %s", targetVHDFile)); err != nil {
return 0, fmt.Errorf("failed sending to tar2vhd for %s: %s", targetVHDFile, err)
}
// Don't need stdin now we've sent everything. This signals GCS that we are finished sending data.
if err := process.Process.CloseStdin(); err != nil {
return 0, fmt.Errorf("failed closing stdin handle for %s: %s", targetVHDFile, err)
}
// Write stdout contents of `tar2vhd` to the VHD file
payloadSize, err := writeFileFromReader(targetVHDFile, process.Stdout, config.UvmTimeoutSeconds, fmt.Sprintf("stdout of tar2vhd to %s", targetVHDFile))
if err != nil {
return 0, fmt.Errorf("failed to write %s during tar2vhd: %s", targetVHDFile, err)
}
logrus.Debugf("opengcs: TarToVhd: %s created, %d bytes", targetVHDFile, payloadSize)
return payloadSize, err
}

333
vendor/github.com/Microsoft/opengcs/init/init.c generated vendored Normal file
View File

@ -0,0 +1,333 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#define DEFAULT_PATH_ENV "PATH=/sbin:/usr/sbin:/bin:/usr/bin"
const char *const default_envp[] = {
DEFAULT_PATH_ENV,
NULL,
};
// When nothing is passed, default to the LCOWv1 behavior.
const char *const default_argv[] = { "/bin/gcs", "-loglevel", "debug", "-logfile=/tmp/gcs.log" };
const char *const default_shell = "/bin/sh";
struct Mount {
const char *source, *target, *type;
unsigned long flags;
const void *data;
};
struct Mkdir {
const char *path;
mode_t mode;
};
struct Mknod {
const char *path;
mode_t mode;
int major, minor;
};
struct Symlink {
const char *linkpath, *target;
};
enum OpType {
OpMount,
OpMkdir,
OpMknod,
OpSymlink,
};
struct InitOp {
enum OpType op;
union {
struct Mount mount;
struct Mkdir mkdir;
struct Mknod mknod;
struct Symlink symlink;
};
};
const struct InitOp ops[] = {
// mount /proc (which should already exist)
{ OpMount, .mount = { "proc", "/proc", "proc", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
// add symlinks in /dev (which is already mounted)
{ OpSymlink, .symlink = { "/dev/fd", "/proc/self/fd" } },
{ OpSymlink, .symlink = { "/dev/stdin", "/proc/self/fd/0" } },
{ OpSymlink, .symlink = { "/dev/stdout", "/proc/self/fd/1" } },
{ OpSymlink, .symlink = { "/dev/stderr", "/proc/self/fd/2" } },
// mount tmpfs on /run and /tmp (which should already exist)
{ OpMount, .mount = { "tmpfs", "/run", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC, "mode=0755" } },
{ OpMount, .mount = { "tmpfs", "/tmp", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
// mount shm and devpts
{ OpMkdir, .mkdir = { "/dev/shm", 0755 } },
{ OpMount, .mount = { "shm", "/dev/shm", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
{ OpMkdir, .mkdir = { "/dev/pts", 0755 } },
{ OpMount, .mount = { "devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC } },
// mount /sys (which should already exist)
{ OpMount, .mount = { "sysfs", "/sys", "sysfs", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
{ OpMount, .mount = { "cgroup_root", "/sys/fs/cgroup", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC, "mode=0755" } },
};
void warn(const char *msg) {
int error = errno;
perror(msg);
errno = error;
}
void warn2(const char *msg1, const char *msg2) {
int error = errno;
fputs(msg1, stderr);
fputs(": ", stderr);
errno = error;
warn(msg2);
}
_Noreturn void dien() {
exit(errno);
}
_Noreturn void die(const char *msg) {
warn(msg);
dien();
}
_Noreturn void die2(const char *msg1, const char *msg2) {
warn2(msg1, msg2);
dien();
}
void init_dev() {
if (mount("dev", "/dev", "devtmpfs", MS_NOSUID | MS_NOEXEC, NULL) < 0) {
warn2("mount", "/dev");
// /dev will be already mounted if devtmpfs.mount = 1 on the kernel
// command line or CONFIG_DEVTMPFS_MOUNT is set. Do not consider this
// an error.
if (errno != EBUSY) {
dien();
}
}
}
void init_fs(const struct InitOp *ops, size_t count) {
for (size_t i = 0; i < count; i++) {
switch (ops[i].op) {
case OpMount: {
const struct Mount *m = &ops[i].mount;
if (mount(m->source, m->target, m->type, m->flags, m->data) < 0) {
die2("mount", m->target);
}
break;
}
case OpMkdir: {
const struct Mkdir *m = &ops[i].mkdir;
if (mkdir(m->path, m->mode) < 0) {
warn2("mkdir", m->path);
if (errno != EEXIST) {
dien();
}
}
break;
}
case OpMknod: {
const struct Mknod *n = &ops[i].mknod;
if (mknod(n->path, n->mode, makedev(n->major, n->minor)) < 0) {
warn2("mknod", n->path);
if (errno != EEXIST) {
dien();
}
}
break;
}
case OpSymlink: {
const struct Symlink *sl = &ops[i].symlink;
if (symlink(sl->target, sl->linkpath) < 0) {
warn2("symlink", sl->linkpath);
if (errno != EEXIST) {
dien();
}
}
break;
}
}
}
}
void init_cgroups() {
const char *fpath = "/proc/cgroups";
FILE *f = fopen(fpath, "r");
if (f == NULL) {
die2("fopen", fpath);
}
// Skip the first line.
for (;;) {
char c = fgetc(f);
if (c == EOF || c == '\n') {
break;
}
}
for (;;) {
static const char base_path[] = "/sys/fs/cgroup/";
char path[sizeof(base_path) - 1 + 64];
char* name = path + sizeof(base_path) - 1;
int hier, groups, enabled;
int r = fscanf(f, "%64s %d %d %d\n", name, &hier, &groups, &enabled);
if (r == EOF) {
break;
}
if (r != 4) {
errno = errno ? : EINVAL;
die2("fscanf", fpath);
}
if (enabled) {
memcpy(path, base_path, sizeof(base_path) - 1);
if (mkdir(path, 0755) < 0) {
die2("mkdir", path);
}
if (mount(name, path, "cgroup", MS_NODEV | MS_NOSUID | MS_NOEXEC, name) < 0) {
die2("mount", path);
}
}
}
fclose(f);
}
void init_network(const char *iface, int domain) {
int s = socket(domain, SOCK_DGRAM, IPPROTO_IP);
if (s < 0) {
if (errno == EAFNOSUPPORT) {
return;
}
die("socket");
}
struct ifreq request = {0};
strncpy(request.ifr_name, iface, sizeof(request.ifr_name));
if (ioctl(s, SIOCGIFFLAGS, &request) < 0) {
die2("ioctl(SIOCGIFFLAGS)", iface);
}
request.ifr_flags |= IFF_UP | IFF_RUNNING;
if (ioctl(s, SIOCSIFFLAGS, &request) < 0) {
die2("ioctl(SIOCSIFFLAGS)", iface);
}
close(s);
}
pid_t launch(int argc, char **argv) {
int pid = fork();
if (pid != 0) {
if (pid < 0) {
die("fork");
}
return pid;
}
// Unblock signals before execing.
sigset_t set;
sigfillset(&set);
sigprocmask(SIG_UNBLOCK, &set, 0);
// Create a session and process group.
setsid();
setpgid(0, 0);
// Terminate the arguments and exec.
char **argvn = alloca(sizeof(argv[0]) * (argc + 1));
memcpy(argvn, argv, sizeof(argv[0]) * argc);
argvn[argc] = NULL;
if (putenv(DEFAULT_PATH_ENV)) { // Specify the PATH used for execvpe
die("putenv");
}
execvpe(argvn[0], argvn, (char**)default_envp);
die2("execvpe", argvn[0]);
}
int reap_until(pid_t until_pid) {
for (;;) {
int status;
pid_t pid = wait(&status);
if (pid < 0) {
die("wait");
}
if (pid == until_pid) {
// The initial child process died. Pass through the exit status.
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) != 0) {
fputs("child exited with error\n", stderr);
}
return WEXITSTATUS(status);
}
fputs("child exited by signal\n", stderr);
return 128 + WTERMSIG(status);
}
}
}
int main(int argc, char **argv) {
char *debug_shell = NULL;
if (argc <= 1) {
argv = (char **)default_argv;
argc = sizeof(default_argv) / sizeof(default_argv[0]);
optind = 0;
debug_shell = (char*)default_shell;
} else {
for (int opt; (opt = getopt(argc, argv, "+d:")) >= 0; ) {
switch (opt) {
case 'd':
debug_shell = optarg;
break;
default:
exit(1);
}
}
}
char **child_argv = argv + optind;
int child_argc = argc - optind;
// Block all signals in init. SIGCHLD will still cause wait() to return.
sigset_t set;
sigfillset(&set);
sigprocmask(SIG_BLOCK, &set, 0);
init_dev();
init_fs(ops, sizeof(ops) / sizeof(ops[0]));
init_cgroups();
init_network("lo", AF_INET);
init_network("lo", AF_INET6);
pid_t pid = launch(child_argc, child_argv);
if (debug_shell != NULL) {
// The debug shell takes over as the primary child.
pid = launch(1, &debug_shell);
}
// Reap until the initial child process dies.
return reap_until(pid);
}