mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
386b06eacd
Relevant changes: - containerd/containerd#51 Fix empty device type - containerd/containerd#52 Remove call to unitName - Calling unitName incorrectly appends -slice onto the end of the slice cgroup we are looking for - addresses containerd/containerd#47 cgroups: cgroup deleted - containerd/containerd#53 systemd-239+ no longer allows delegate slice - containerd/containerd#54 Bugfix: can't write to cpuset cgroup - containerd/containerd#63 Makes Load function more lenient on subsystems' checking - addresses containerd/containerd#58 Very strict checking of subsystems' existence while loading cgroup - containerd/containerd#67 Add functionality for retrieving all tasks of a cgroup - containerd/containerd#68 Fix net_prio typo - containerd/containerd#69 Blkio weight/leafWeight pointer value - containerd/containerd#77 Check for non-active/supported cgroups - addresses containerd/containerd#76 unable to find * in controller set: unknown - addresses docker/for-linux#545 Raspbian: Error response from daemon: unable to find "net_prio" in controller set: unknown - addresses docker/for-linux#552 Error response from daemon: unable to find "cpuacct" in controller set: unknown - addresses docker/for-linux#545 Raspbian: Error response from daemon: unable to find "net_prio" in controller set: unknown Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
324 lines
7.2 KiB
Go
324 lines
7.2 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package cgroups
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
units "github.com/docker/go-units"
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
)
|
|
|
|
var isUserNS = runningInUserNS()
|
|
|
|
// runningInUserNS detects whether we are currently running in a user namespace.
|
|
// Copied from github.com/lxc/lxd/shared/util.go
|
|
func runningInUserNS() bool {
|
|
file, err := os.Open("/proc/self/uid_map")
|
|
if err != nil {
|
|
// This kernel-provided file only exists if user namespaces are supported
|
|
return false
|
|
}
|
|
defer file.Close()
|
|
|
|
buf := bufio.NewReader(file)
|
|
l, _, err := buf.ReadLine()
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
line := string(l)
|
|
var a, b, c int64
|
|
fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
|
|
/*
|
|
* We assume we are in the initial user namespace if we have a full
|
|
* range - 4294967295 uids starting at uid 0.
|
|
*/
|
|
if a == 0 && b == 0 && c == 4294967295 {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// defaults returns all known groups
|
|
func defaults(root string) ([]Subsystem, error) {
|
|
h, err := NewHugetlb(root)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return nil, err
|
|
}
|
|
s := []Subsystem{
|
|
NewNamed(root, "systemd"),
|
|
NewFreezer(root),
|
|
NewPids(root),
|
|
NewNetCls(root),
|
|
NewNetPrio(root),
|
|
NewPerfEvent(root),
|
|
NewCputset(root),
|
|
NewCpu(root),
|
|
NewCpuacct(root),
|
|
NewMemory(root),
|
|
NewBlkio(root),
|
|
NewRdma(root),
|
|
}
|
|
// only add the devices cgroup if we are not in a user namespace
|
|
// because modifications are not allowed
|
|
if !isUserNS {
|
|
s = append(s, NewDevices(root))
|
|
}
|
|
// add the hugetlb cgroup if error wasn't due to missing hugetlb
|
|
// cgroup support on the host
|
|
if err == nil {
|
|
s = append(s, h)
|
|
}
|
|
return s, nil
|
|
}
|
|
|
|
// remove will remove a cgroup path handling EAGAIN and EBUSY errors and
|
|
// retrying the remove after a exp timeout
|
|
func remove(path string) error {
|
|
delay := 10 * time.Millisecond
|
|
for i := 0; i < 5; i++ {
|
|
if i != 0 {
|
|
time.Sleep(delay)
|
|
delay *= 2
|
|
}
|
|
if err := os.RemoveAll(path); err == nil {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("cgroups: unable to remove path %q", path)
|
|
}
|
|
|
|
// readPids will read all the pids of processes in a cgroup by the provided path
|
|
func readPids(path string, subsystem Name) ([]Process, error) {
|
|
f, err := os.Open(filepath.Join(path, cgroupProcs))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
var (
|
|
out []Process
|
|
s = bufio.NewScanner(f)
|
|
)
|
|
for s.Scan() {
|
|
if t := s.Text(); t != "" {
|
|
pid, err := strconv.Atoi(t)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, Process{
|
|
Pid: pid,
|
|
Subsystem: subsystem,
|
|
Path: path,
|
|
})
|
|
}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// readTasksPids will read all the pids of tasks in a cgroup by the provided path
|
|
func readTasksPids(path string, subsystem Name) ([]Task, error) {
|
|
f, err := os.Open(filepath.Join(path, cgroupTasks))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
var (
|
|
out []Task
|
|
s = bufio.NewScanner(f)
|
|
)
|
|
for s.Scan() {
|
|
if t := s.Text(); t != "" {
|
|
pid, err := strconv.Atoi(t)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, Task{
|
|
Pid: pid,
|
|
Subsystem: subsystem,
|
|
Path: path,
|
|
})
|
|
}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func hugePageSizes() ([]string, error) {
|
|
var (
|
|
pageSizes []string
|
|
sizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"}
|
|
)
|
|
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, st := range files {
|
|
nameArray := strings.Split(st.Name(), "-")
|
|
pageSize, err := units.RAMInBytes(nameArray[1])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList))
|
|
}
|
|
return pageSizes, nil
|
|
}
|
|
|
|
func readUint(path string) (uint64, error) {
|
|
v, err := ioutil.ReadFile(path)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return parseUint(strings.TrimSpace(string(v)), 10, 64)
|
|
}
|
|
|
|
func parseUint(s string, base, bitSize int) (uint64, error) {
|
|
v, err := strconv.ParseUint(s, base, bitSize)
|
|
if err != nil {
|
|
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
|
// 1. Handle negative values greater than MinInt64 (and)
|
|
// 2. Handle negative values lesser than MinInt64
|
|
if intErr == nil && intValue < 0 {
|
|
return 0, nil
|
|
} else if intErr != nil &&
|
|
intErr.(*strconv.NumError).Err == strconv.ErrRange &&
|
|
intValue < 0 {
|
|
return 0, nil
|
|
}
|
|
return 0, err
|
|
}
|
|
return v, nil
|
|
}
|
|
|
|
func parseKV(raw string) (string, uint64, error) {
|
|
parts := strings.Fields(raw)
|
|
switch len(parts) {
|
|
case 2:
|
|
v, err := parseUint(parts[1], 10, 64)
|
|
if err != nil {
|
|
return "", 0, err
|
|
}
|
|
return parts[0], v, nil
|
|
default:
|
|
return "", 0, ErrInvalidFormat
|
|
}
|
|
}
|
|
|
|
func parseCgroupFile(path string) (map[string]string, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
return parseCgroupFromReader(f)
|
|
}
|
|
|
|
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
|
var (
|
|
cgroups = make(map[string]string)
|
|
s = bufio.NewScanner(r)
|
|
)
|
|
for s.Scan() {
|
|
if err := s.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
var (
|
|
text = s.Text()
|
|
parts = strings.SplitN(text, ":", 3)
|
|
)
|
|
if len(parts) < 3 {
|
|
return nil, fmt.Errorf("invalid cgroup entry: %q", text)
|
|
}
|
|
for _, subs := range strings.Split(parts[1], ",") {
|
|
if subs != "" {
|
|
cgroups[subs] = parts[2]
|
|
}
|
|
}
|
|
}
|
|
return cgroups, nil
|
|
}
|
|
|
|
func getCgroupDestination(subsystem string) (string, error) {
|
|
f, err := os.Open("/proc/self/mountinfo")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
s := bufio.NewScanner(f)
|
|
for s.Scan() {
|
|
if err := s.Err(); err != nil {
|
|
return "", err
|
|
}
|
|
fields := strings.Fields(s.Text())
|
|
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
|
if opt == subsystem {
|
|
return fields[3], nil
|
|
}
|
|
}
|
|
}
|
|
return "", ErrNoCgroupMountDestination
|
|
}
|
|
|
|
func pathers(subystems []Subsystem) []pather {
|
|
var out []pather
|
|
for _, s := range subystems {
|
|
if p, ok := s.(pather); ok {
|
|
out = append(out, p)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error {
|
|
if c, ok := s.(creator); ok {
|
|
p, err := path(s.Name())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := c.Create(p, resources); err != nil {
|
|
return err
|
|
}
|
|
} else if c, ok := s.(pather); ok {
|
|
p, err := path(s.Name())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// do the default create if the group does not have a custom one
|
|
if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func cleanPath(path string) string {
|
|
if path == "" {
|
|
return ""
|
|
}
|
|
path = filepath.Clean(path)
|
|
if !filepath.IsAbs(path) {
|
|
path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
|
|
}
|
|
return filepath.Clean(path)
|
|
}
|