2013-12-21 11:02:06 -05:00
|
|
|
package cgroups
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"fmt"
|
2014-01-20 13:52:26 -05:00
|
|
|
"github.com/dotcloud/docker/pkg/mount"
|
2013-12-21 11:02:06 -05:00
|
|
|
"io"
|
2014-02-20 18:48:48 -05:00
|
|
|
"io/ioutil"
|
2013-12-21 11:02:06 -05:00
|
|
|
"os"
|
2014-02-20 18:48:48 -05:00
|
|
|
"path/filepath"
|
|
|
|
"strconv"
|
2013-12-21 11:02:06 -05:00
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
2014-02-20 18:48:48 -05:00
|
|
|
type Cgroup struct {
|
|
|
|
Name string `json:"name,omitempty"`
|
|
|
|
Parent string `json:"parent,omitempty"`
|
|
|
|
|
|
|
|
DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice
|
|
|
|
Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes)
|
|
|
|
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
|
|
|
|
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
|
|
|
|
}
|
|
|
|
|
2014-01-15 20:26:04 -05:00
|
|
|
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
|
2013-12-21 11:02:06 -05:00
|
|
|
func FindCgroupMountpoint(subsystem string) (string, error) {
|
|
|
|
mounts, err := mount.GetMounts()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, mount := range mounts {
|
|
|
|
if mount.Fstype == "cgroup" {
|
|
|
|
for _, opt := range strings.Split(mount.VfsOpts, ",") {
|
|
|
|
if opt == subsystem {
|
|
|
|
return mount.Mountpoint, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return "", fmt.Errorf("cgroup mountpoint not found for %s", subsystem)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the relative path to the cgroup docker is running in.
|
2014-01-28 10:17:51 -05:00
|
|
|
func GetThisCgroupDir(subsystem string) (string, error) {
|
2013-12-21 11:02:06 -05:00
|
|
|
f, err := os.Open("/proc/self/cgroup")
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
return parseCgroupFile(subsystem, f)
|
|
|
|
}
|
|
|
|
|
2014-02-20 17:12:08 -05:00
|
|
|
func GetInitCgroupDir(subsystem string) (string, error) {
|
|
|
|
f, err := os.Open("/proc/1/cgroup")
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
return parseCgroupFile(subsystem, f)
|
|
|
|
}
|
|
|
|
|
2014-02-20 18:48:48 -05:00
|
|
|
func (c *Cgroup) Path(root, subsystem string) (string, error) {
|
|
|
|
cgroup := c.Name
|
|
|
|
if c.Parent != "" {
|
|
|
|
cgroup = filepath.Join(c.Parent, cgroup)
|
|
|
|
}
|
|
|
|
initPath, err := GetInitCgroupDir(subsystem)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return filepath.Join(root, subsystem, initPath, cgroup), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Cgroup) Join(root, subsystem string, pid int) (string, error) {
|
|
|
|
path, err := c.Path(root, subsystem)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return path, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Cgroup) Cleanup(root string) error {
|
|
|
|
get := func(subsystem string) string {
|
|
|
|
path, _ := c.Path(root, subsystem)
|
|
|
|
return path
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, path := range []string{
|
|
|
|
get("memory"),
|
|
|
|
get("devices"),
|
|
|
|
get("cpu"),
|
|
|
|
} {
|
|
|
|
os.RemoveAll(path)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2013-12-21 11:02:06 -05:00
|
|
|
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
|
|
|
|
s := bufio.NewScanner(r)
|
|
|
|
for s.Scan() {
|
|
|
|
if err := s.Err(); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
text := s.Text()
|
|
|
|
parts := strings.Split(text, ":")
|
2014-02-20 17:12:08 -05:00
|
|
|
for _, subs := range strings.Split(parts[1], ",") {
|
|
|
|
if subs == subsystem {
|
|
|
|
return parts[2], nil
|
|
|
|
}
|
2013-12-21 11:02:06 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem)
|
|
|
|
}
|
2014-02-20 18:48:48 -05:00
|
|
|
|
|
|
|
func writeFile(dir, file, data string) error {
|
|
|
|
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
|
|
|
}
|
2014-02-20 19:11:22 -05:00
|
|
|
|
|
|
|
func (c *Cgroup) Apply(pid int) error {
|
|
|
|
// We have two implementation of cgroups support, one is based on
|
|
|
|
// systemd and the dbus api, and one is based on raw cgroup fs operations
|
|
|
|
// following the pre-single-writer model docs at:
|
|
|
|
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
|
|
|
|
//
|
|
|
|
// we can pick any subsystem to find the root
|
2014-02-24 13:46:20 -05:00
|
|
|
cgroupRoot, err := FindCgroupMountpoint("cpu")
|
2014-02-20 19:11:22 -05:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
cgroupRoot = filepath.Dir(cgroupRoot)
|
|
|
|
|
|
|
|
if _, err := os.Stat(cgroupRoot); err != nil {
|
|
|
|
return fmt.Errorf("cgroups fs not found")
|
|
|
|
}
|
|
|
|
if err := c.setupDevices(cgroupRoot, pid); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := c.setupMemory(cgroupRoot, pid); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := c.setupCpu(cgroupRoot, pid); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Cgroup) setupDevices(cgroupRoot string, pid int) (err error) {
|
|
|
|
if !c.DeviceAccess {
|
|
|
|
dir, err := c.Join(cgroupRoot, "devices", pid)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
os.RemoveAll(dir)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
if err := writeFile(dir, "devices.deny", "a"); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
allow := []string{
|
|
|
|
// /dev/null, zero, full
|
|
|
|
"c 1:3 rwm",
|
|
|
|
"c 1:5 rwm",
|
|
|
|
"c 1:7 rwm",
|
|
|
|
|
|
|
|
// consoles
|
|
|
|
"c 5:1 rwm",
|
|
|
|
"c 5:0 rwm",
|
|
|
|
"c 4:0 rwm",
|
|
|
|
"c 4:1 rwm",
|
|
|
|
|
|
|
|
// /dev/urandom,/dev/random
|
|
|
|
"c 1:9 rwm",
|
|
|
|
"c 1:8 rwm",
|
|
|
|
|
|
|
|
// /dev/pts/ - pts namespaces are "coming soon"
|
|
|
|
"c 136:* rwm",
|
|
|
|
"c 5:2 rwm",
|
|
|
|
|
|
|
|
// tuntap
|
|
|
|
"c 10:200 rwm",
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, val := range allow {
|
|
|
|
if err := writeFile(dir, "devices.allow", val); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Cgroup) setupMemory(cgroupRoot string, pid int) (err error) {
|
|
|
|
if c.Memory != 0 || c.MemorySwap != 0 {
|
|
|
|
dir, err := c.Join(cgroupRoot, "memory", pid)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
os.RemoveAll(dir)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
if c.Memory != 0 {
|
|
|
|
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2014-02-25 22:45:57 -05:00
|
|
|
// By default, MemorySwap is set to twice the size of RAM.
|
|
|
|
// If you want to omit MemorySwap, set it to `-1'.
|
|
|
|
if c.MemorySwap != -1 {
|
|
|
|
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil {
|
2014-02-20 19:11:22 -05:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Cgroup) setupCpu(cgroupRoot string, pid int) (err error) {
|
|
|
|
// We always want to join the cpu group, to allow fair cpu scheduling
|
|
|
|
// on a container basis
|
|
|
|
dir, err := c.Join(cgroupRoot, "cpu", pid)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if c.CpuShares != 0 {
|
|
|
|
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|