// +build !windows package execdriver import ( "encoding/json" "io/ioutil" "os" "path/filepath" "strconv" "strings" "time" "github.com/docker/docker/daemon/execdriver/native/template" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/mount" "github.com/docker/go-units" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" blkiodev "github.com/opencontainers/runc/libcontainer/configs" ) // Mount contains information for a mount operation. type Mount struct { Source string `json:"source"` Destination string `json:"destination"` Writable bool `json:"writable"` Data string `json:"data"` Propagation string `json:"mountpropagation"` } // Resources contains all resource configs for a driver. // Currently these are all for cgroup configs. type Resources struct { CommonResources // Fields below here are platform specific BlkioWeightDevice []*blkiodev.WeightDevice `json:"blkio_weight_device"` BlkioThrottleReadBpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_read_bps_device"` BlkioThrottleWriteBpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_write_bps_device"` BlkioThrottleReadIOpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_read_iops_device"` BlkioThrottleWriteIOpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_write_iops_device"` MemorySwap int64 `json:"memory_swap"` KernelMemory int64 `json:"kernel_memory"` CPUQuota int64 `json:"cpu_quota"` CpusetCpus string `json:"cpuset_cpus"` CpusetMems string `json:"cpuset_mems"` CPUPeriod int64 `json:"cpu_period"` Rlimits []*units.Rlimit `json:"rlimits"` OomKillDisable bool `json:"oom_kill_disable"` MemorySwappiness int64 `json:"memory_swappiness"` } // ProcessConfig is the platform specific structure that describes a process // that will be run inside a container. type ProcessConfig struct { CommonProcessConfig // Fields below here are platform specific Privileged bool `json:"privileged"` User string `json:"user"` Console string `json:"-"` // dev/console path } // Ipc settings of the container // It is for IPC namespace setting. Usually different containers // have their own IPC namespace, however this specifies to use // an existing IPC namespace. // You can join the host's or a container's IPC namespace. type Ipc struct { ContainerID string `json:"container_id"` // id of the container to join ipc. HostIpc bool `json:"host_ipc"` } // Pid settings of the container // It is for PID namespace setting. Usually different containers // have their own PID namespace, however this specifies to use // an existing PID namespace. // Joining the host's PID namespace is currently the only supported // option. type Pid struct { HostPid bool `json:"host_pid"` } // UTS settings of the container // It is for UTS namespace setting. Usually different containers // have their own UTS namespace, however this specifies to use // an existing UTS namespace. // Joining the host's UTS namespace is currently the only supported // option. type UTS struct { HostUTS bool `json:"host_uts"` } // Network settings of the container type Network struct { Mtu int `json:"mtu"` ContainerID string `json:"container_id"` // id of the container to join network. NamespacePath string `json:"namespace_path"` HostNetworking bool `json:"host_networking"` } // Command wraps an os/exec.Cmd to add more metadata type Command struct { CommonCommand // Fields below here are platform specific AllowedDevices []*configs.Device `json:"allowed_devices"` AppArmorProfile string `json:"apparmor_profile"` AutoCreatedDevices []*configs.Device `json:"autocreated_devices"` CapAdd []string `json:"cap_add"` CapDrop []string `json:"cap_drop"` CgroupParent string `json:"cgroup_parent"` // The parent cgroup for this command. GIDMapping []idtools.IDMap `json:"gidmapping"` GroupAdd []string `json:"group_add"` Ipc *Ipc `json:"ipc"` OomScoreAdj int `json:"oom_score_adj"` Pid *Pid `json:"pid"` ReadonlyRootfs bool `json:"readonly_rootfs"` RemappedRoot *User `json:"remap_root"` SeccompProfile string `json:"seccomp_profile"` UIDMapping []idtools.IDMap `json:"uidmapping"` UTS *UTS `json:"uts"` } // SetRootPropagation sets the root mount propagation mode. func SetRootPropagation(config *configs.Config, propagation int) { config.RootPropagation = propagation } // InitContainer is the initialization of a container config. // It returns the initial configs for a container. It's mostly // defined by the default template. func InitContainer(c *Command) *configs.Config { container := template.New() container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env) container.Cgroups.Name = c.ID container.Cgroups.Resources.AllowedDevices = c.AllowedDevices container.Devices = c.AutoCreatedDevices container.Rootfs = c.Rootfs container.Readonlyfs = c.ReadonlyRootfs // This can be overridden later by driver during mount setup based // on volume options SetRootPropagation(container, mount.RPRIVATE) // check to see if we are running in ramdisk to disable pivot root container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" // Default parent cgroup is "docker". Override if required. if c.CgroupParent != "" { container.Cgroups.Parent = c.CgroupParent } return container } func getEnv(key string, env []string) string { for _, pair := range env { parts := strings.SplitN(pair, "=", 2) if parts[0] == key { return parts[1] } } return "" } // SetupCgroups setups cgroup resources for a container. func SetupCgroups(container *configs.Config, c *Command) error { if c.Resources != nil { container.Cgroups.Resources.CpuShares = c.Resources.CPUShares container.Cgroups.Resources.Memory = c.Resources.Memory container.Cgroups.Resources.MemoryReservation = c.Resources.MemoryReservation container.Cgroups.Resources.MemorySwap = c.Resources.MemorySwap container.Cgroups.Resources.KernelMemory = c.Resources.KernelMemory container.Cgroups.Resources.CpusetCpus = c.Resources.CpusetCpus container.Cgroups.Resources.CpusetMems = c.Resources.CpusetMems container.Cgroups.Resources.CpuPeriod = c.Resources.CPUPeriod container.Cgroups.Resources.CpuQuota = c.Resources.CPUQuota container.Cgroups.Resources.BlkioWeight = c.Resources.BlkioWeight container.Cgroups.Resources.BlkioWeightDevice = c.Resources.BlkioWeightDevice container.Cgroups.Resources.BlkioThrottleReadBpsDevice = c.Resources.BlkioThrottleReadBpsDevice container.Cgroups.Resources.BlkioThrottleWriteBpsDevice = c.Resources.BlkioThrottleWriteBpsDevice container.Cgroups.Resources.BlkioThrottleReadIOPSDevice = c.Resources.BlkioThrottleReadIOpsDevice container.Cgroups.Resources.BlkioThrottleWriteIOPSDevice = c.Resources.BlkioThrottleWriteIOpsDevice container.Cgroups.Resources.OomKillDisable = c.Resources.OomKillDisable container.Cgroups.Resources.MemorySwappiness = c.Resources.MemorySwappiness } return nil } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. func getNetworkInterfaceStats(interfaceName string) (*libcontainer.NetworkInterface, error) { out := &libcontainer.NetworkInterface{Name: interfaceName} // This can happen if the network runtime information is missing - possible if the // container was created by an old version of libcontainer. if interfaceName == "" { return out, nil } type netStatsPair struct { // Where to write the output. Out *uint64 // The network stats file to read. File string } // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. netStats := []netStatsPair{ {Out: &out.RxBytes, File: "tx_bytes"}, {Out: &out.RxPackets, File: "tx_packets"}, {Out: &out.RxErrors, File: "tx_errors"}, {Out: &out.RxDropped, File: "tx_dropped"}, {Out: &out.TxBytes, File: "rx_bytes"}, {Out: &out.TxPackets, File: "rx_packets"}, {Out: &out.TxErrors, File: "rx_errors"}, {Out: &out.TxDropped, File: "rx_dropped"}, } for _, netStat := range netStats { data, err := readSysfsNetworkStats(interfaceName, netStat.File) if err != nil { return nil, err } *(netStat.Out) = data } return out, nil } // Reads the specified statistics available under /sys/class/net//statistics func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) if err != nil { return 0, err } return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) } // Stats collects all the resource usage information from a container. func Stats(containerDir string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) { f, err := os.Open(filepath.Join(containerDir, "state.json")) if err != nil { return nil, err } defer f.Close() type network struct { Type string HostInterfaceName string } state := struct { CgroupPaths map[string]string `json:"cgroup_paths"` Networks []network }{} if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, err } now := time.Now() mgr := fs.Manager{Paths: state.CgroupPaths} cstats, err := mgr.GetStats() if err != nil { return nil, err } stats := &libcontainer.Stats{CgroupStats: cstats} // if the container does not have any memory limit specified set the // limit to the machines memory memoryLimit := containerMemoryLimit if memoryLimit == 0 { memoryLimit = machineMemory } for _, iface := range state.Networks { switch iface.Type { case "veth": istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) if err != nil { return nil, err } stats.Interfaces = append(stats.Interfaces, istats) } } return &ResourceStats{ Stats: stats, Read: now, MemoryLimit: memoryLimit, }, nil } // User contains the uid and gid representing a Unix user type User struct { UID int `json:"root_uid"` GID int `json:"root_gid"` } // ExitStatus provides exit reasons for a container. type ExitStatus struct { // The exit code with which the container exited. ExitCode int // Whether the container encountered an OOM. OOMKilled bool }