diff --git a/MAINTAINERS b/MAINTAINERS index d1f4d15491..581953cf8d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,4 +1,4 @@ -Solomon Hykes (@shykes) +Solomon Hykes (@shykes) Guillaume J. Charmes (@creack) Victor Vieux (@vieux) Michael Crosby (@crosbymichael) diff --git a/archive/diff.go b/archive/diff.go index e20e4b1f02..87e8ac7dc4 100644 --- a/archive/diff.go +++ b/archive/diff.go @@ -68,7 +68,7 @@ func ApplyLayer(dest string, layer ArchiveReader) error { parent := filepath.Dir(hdr.Name) parentPath := filepath.Join(dest, parent) if _, err := os.Lstat(parentPath); err != nil && os.IsNotExist(err) { - err = os.MkdirAll(parentPath, 600) + err = os.MkdirAll(parentPath, 0600) if err != nil { return err } diff --git a/contrib/man/md/docker-attach.md b/contrib/man/md/docker-attach.1.md similarity index 100% rename from contrib/man/md/docker-attach.md rename to contrib/man/md/docker-attach.1.md diff --git a/contrib/man/md/docker-build.md b/contrib/man/md/docker-build.1.md similarity index 100% rename from contrib/man/md/docker-build.md rename to contrib/man/md/docker-build.1.md diff --git a/contrib/man/md/docker-commit.md b/contrib/man/md/docker-commit.1.md similarity index 100% rename from contrib/man/md/docker-commit.md rename to contrib/man/md/docker-commit.1.md diff --git a/contrib/man/md/docker-cp.md b/contrib/man/md/docker-cp.1.md similarity index 100% rename from contrib/man/md/docker-cp.md rename to contrib/man/md/docker-cp.1.md diff --git a/contrib/man/md/docker-diff.md b/contrib/man/md/docker-diff.1.md similarity index 100% rename from contrib/man/md/docker-diff.md rename to contrib/man/md/docker-diff.1.md diff --git a/contrib/man/md/docker-events.md b/contrib/man/md/docker-events.1.md similarity index 100% rename from contrib/man/md/docker-events.md rename to contrib/man/md/docker-events.1.md diff --git a/contrib/man/md/docker-export.md b/contrib/man/md/docker-export.1.md similarity index 100% rename from contrib/man/md/docker-export.md rename to contrib/man/md/docker-export.1.md diff --git a/contrib/man/md/docker-history.md b/contrib/man/md/docker-history.1.md similarity index 100% rename from contrib/man/md/docker-history.md rename to contrib/man/md/docker-history.1.md diff --git a/contrib/man/md/docker-images.md b/contrib/man/md/docker-images.1.md similarity index 100% rename from contrib/man/md/docker-images.md rename to contrib/man/md/docker-images.1.md diff --git a/contrib/man/md/docker-import.md b/contrib/man/md/docker-import.1.md similarity index 100% rename from contrib/man/md/docker-import.md rename to contrib/man/md/docker-import.1.md diff --git a/contrib/man/md/docker-info.md b/contrib/man/md/docker-info.1.md similarity index 100% rename from contrib/man/md/docker-info.md rename to contrib/man/md/docker-info.1.md diff --git a/contrib/man/md/docker-inspect.md b/contrib/man/md/docker-inspect.1.md similarity index 100% rename from contrib/man/md/docker-inspect.md rename to contrib/man/md/docker-inspect.1.md diff --git a/contrib/man/md/docker-kill.md b/contrib/man/md/docker-kill.1.md similarity index 100% rename from contrib/man/md/docker-kill.md rename to contrib/man/md/docker-kill.1.md diff --git a/contrib/man/md/docker-load.md b/contrib/man/md/docker-load.1.md similarity index 100% rename from contrib/man/md/docker-load.md rename to contrib/man/md/docker-load.1.md diff --git a/contrib/man/md/docker-login.md b/contrib/man/md/docker-login.1.md similarity index 100% rename from contrib/man/md/docker-login.md rename to contrib/man/md/docker-login.1.md diff --git a/contrib/man/md/docker-logs.md b/contrib/man/md/docker-logs.1.md similarity index 100% rename from contrib/man/md/docker-logs.md rename to contrib/man/md/docker-logs.1.md diff --git a/contrib/man/md/docker-port.md b/contrib/man/md/docker-port.1.md similarity index 100% rename from contrib/man/md/docker-port.md rename to contrib/man/md/docker-port.1.md diff --git a/contrib/man/md/docker-ps.md b/contrib/man/md/docker-ps.1.md similarity index 100% rename from contrib/man/md/docker-ps.md rename to contrib/man/md/docker-ps.1.md diff --git a/contrib/man/md/docker-pull.md b/contrib/man/md/docker-pull.1.md similarity index 100% rename from contrib/man/md/docker-pull.md rename to contrib/man/md/docker-pull.1.md diff --git a/contrib/man/md/docker-push.md b/contrib/man/md/docker-push.1.md similarity index 100% rename from contrib/man/md/docker-push.md rename to contrib/man/md/docker-push.1.md diff --git a/contrib/man/md/docker-restart.md b/contrib/man/md/docker-restart.1.md similarity index 100% rename from contrib/man/md/docker-restart.md rename to contrib/man/md/docker-restart.1.md diff --git a/contrib/man/md/docker-rm.md b/contrib/man/md/docker-rm.1.md similarity index 100% rename from contrib/man/md/docker-rm.md rename to contrib/man/md/docker-rm.1.md diff --git a/contrib/man/md/docker-rmi.md b/contrib/man/md/docker-rmi.1.md similarity index 100% rename from contrib/man/md/docker-rmi.md rename to contrib/man/md/docker-rmi.1.md diff --git a/contrib/man/md/docker-run.md b/contrib/man/md/docker-run.1.md similarity index 100% rename from contrib/man/md/docker-run.md rename to contrib/man/md/docker-run.1.md diff --git a/contrib/man/md/docker-save.md b/contrib/man/md/docker-save.1.md similarity index 100% rename from contrib/man/md/docker-save.md rename to contrib/man/md/docker-save.1.md diff --git a/contrib/man/md/docker-search.md b/contrib/man/md/docker-search.1.md similarity index 100% rename from contrib/man/md/docker-search.md rename to contrib/man/md/docker-search.1.md diff --git a/contrib/man/md/docker-start.md b/contrib/man/md/docker-start.1.md similarity index 100% rename from contrib/man/md/docker-start.md rename to contrib/man/md/docker-start.1.md diff --git a/contrib/man/md/docker-stop.md b/contrib/man/md/docker-stop.1.md similarity index 100% rename from contrib/man/md/docker-stop.md rename to contrib/man/md/docker-stop.1.md diff --git a/contrib/man/md/docker-tag.md b/contrib/man/md/docker-tag.1.md similarity index 100% rename from contrib/man/md/docker-tag.md rename to contrib/man/md/docker-tag.1.md diff --git a/contrib/man/md/docker-top.md b/contrib/man/md/docker-top.1.md similarity index 100% rename from contrib/man/md/docker-top.md rename to contrib/man/md/docker-top.1.md diff --git a/contrib/man/md/docker-wait.md b/contrib/man/md/docker-wait.1.md similarity index 100% rename from contrib/man/md/docker-wait.md rename to contrib/man/md/docker-wait.1.md diff --git a/contrib/man/md/docker.md b/contrib/man/md/docker.1.md similarity index 100% rename from contrib/man/md/docker.md rename to contrib/man/md/docker.1.md diff --git a/contrib/man/md/md2man-all.sh b/contrib/man/md/md2man-all.sh index e482da647a..f33557934c 100755 --- a/contrib/man/md/md2man-all.sh +++ b/contrib/man/md/md2man-all.sh @@ -9,8 +9,14 @@ cd "$(dirname "$(readlink -f "$BASH_SOURCE")")" pwd } -mkdir -p ../man1 - -for FILE in docker*.md; do - pandoc -s -t man "$FILE" -o "../man1/${FILE%.*}.1" +for FILE in *.md; do + base="$(basename "$FILE")" + name="${base%.md}" + num="${name##*.}" + if [ -z "$num" -o "$base" = "$num" ]; then + # skip files that aren't of the format xxxx.N.md (like README.md) + continue + fi + mkdir -p "../man${num}" + pandoc -s -t man "$FILE" -o "../man${num}/${name}" done diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 5c6ea09acc..ef17ce7042 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -7,9 +7,9 @@ import ( "github.com/dotcloud/docker/daemon/execdriver" "github.com/dotcloud/docker/daemon/execdriver/native/configuration" "github.com/dotcloud/docker/daemon/execdriver/native/template" + "github.com/dotcloud/docker/pkg/apparmor" "github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/apparmor" ) // createContainer populates and configures the container type with the diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index 27324c6600..ab82cdcc65 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -3,12 +3,6 @@ package native import ( "encoding/json" "fmt" - "github.com/dotcloud/docker/daemon/execdriver" - "github.com/dotcloud/docker/pkg/cgroups" - "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/apparmor" - "github.com/dotcloud/docker/pkg/libcontainer/nsinit" - "github.com/dotcloud/docker/pkg/system" "io" "io/ioutil" "log" @@ -18,6 +12,13 @@ import ( "strconv" "strings" "syscall" + + "github.com/dotcloud/docker/daemon/execdriver" + "github.com/dotcloud/docker/pkg/apparmor" + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/nsinit" + "github.com/dotcloud/docker/pkg/system" ) const ( diff --git a/daemon/execdriver/native/template/default_template.go b/daemon/execdriver/native/template/default_template.go index d3c433a317..c354637fcb 100644 --- a/daemon/execdriver/native/template/default_template.go +++ b/daemon/execdriver/native/template/default_template.go @@ -1,9 +1,9 @@ package template import ( + "github.com/dotcloud/docker/pkg/apparmor" "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/apparmor" ) // New returns the docker default configuration for libcontainer diff --git a/docs/sources/reference/api/registry_index_spec.md b/docs/sources/reference/api/registry_index_spec.md index 53006cf0b5..aa18a2e3c5 100644 --- a/docs/sources/reference/api/registry_index_spec.md +++ b/docs/sources/reference/api/registry_index_spec.md @@ -644,17 +644,20 @@ You have 3 options: > - X-Docker-Token: Token > signature=123abc,repository=”foo/bar”,access=read > + 2. Provide user credentials only > **Header**: > : Authorization: Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ== > + 3. Provide Token > **Header**: > : Authorization: Token > signature=123abc,repository=”foo/bar”,access=read > + ### 6.2 On the Registry The Registry only supports the Token challenge: diff --git a/engine/MAINTAINERS b/engine/MAINTAINERS index 354798f72e..db33365bcd 100644 --- a/engine/MAINTAINERS +++ b/engine/MAINTAINERS @@ -1 +1 @@ -#Solomon Hykes Temporarily unavailable +Solomon Hykes diff --git a/hack/infrastructure/MAINTAINERS b/hack/infrastructure/MAINTAINERS deleted file mode 100644 index bd089c55f4..0000000000 --- a/hack/infrastructure/MAINTAINERS +++ /dev/null @@ -1,2 +0,0 @@ -Ken Cochrane (@kencochrane) -Jerome Petazzoni (@jpetazzo) diff --git a/hack/make.sh b/hack/make.sh index f3264c9ce3..46df398c57 100755 --- a/hack/make.sh +++ b/hack/make.sh @@ -117,6 +117,14 @@ if [ "$(uname -s)" = 'FreeBSD' ]; then LDFLAGS="$LDFLAGS -extld clang" fi +# If sqlite3.h doesn't exist under /usr/include, +# check /usr/local/include also just in case +# (e.g. FreeBSD Ports installs it under the directory) +if [ ! -e /usr/include/sqlite3.h ] && [ -e /usr/local/include/sqlite3.h ]; then + export CGO_CFLAGS='-I/usr/local/include' + export CGO_LDFLAGS='-L/usr/local/lib' +fi + HAVE_GO_TEST_COVER= if \ go help testflag | grep -- -cover > /dev/null \ diff --git a/pkg/libcontainer/apparmor/apparmor.go b/pkg/apparmor/apparmor.go similarity index 66% rename from pkg/libcontainer/apparmor/apparmor.go rename to pkg/apparmor/apparmor.go index 5de241dd97..0987398124 100644 --- a/pkg/libcontainer/apparmor/apparmor.go +++ b/pkg/apparmor/apparmor.go @@ -8,12 +8,16 @@ package apparmor import "C" import ( "io/ioutil" + "os" "unsafe" ) func IsEnabled() bool { - buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") - return err == nil && len(buf) > 1 && buf[0] == 'Y' + if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil { + buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") + return err == nil && len(buf) > 1 && buf[0] == 'Y' + } + return false } func ApplyProfile(pid int, name string) error { diff --git a/pkg/libcontainer/apparmor/apparmor_disabled.go b/pkg/apparmor/apparmor_disabled.go similarity index 100% rename from pkg/libcontainer/apparmor/apparmor_disabled.go rename to pkg/apparmor/apparmor_disabled.go diff --git a/pkg/apparmor/gen.go b/pkg/apparmor/gen.go new file mode 100644 index 0000000000..825e646d92 --- /dev/null +++ b/pkg/apparmor/gen.go @@ -0,0 +1,94 @@ +package apparmor + +import ( + "io" + "os" + "text/template" +) + +type data struct { + Name string + Imports []string + InnerImports []string +} + +const baseTemplate = ` +{{range $value := .Imports}} +{{$value}} +{{end}} + +profile {{.Name}} flags=(attach_disconnected,mediate_deleted) { +{{range $value := .InnerImports}} + {{$value}} +{{end}} + + network, + capability, + file, + umount, + + mount fstype=tmpfs, + mount fstype=mqueue, + mount fstype=fuse.*, + mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/, + mount fstype=efivarfs -> /sys/firmware/efi/efivars/, + mount fstype=fusectl -> /sys/fs/fuse/connections/, + mount fstype=securityfs -> /sys/kernel/security/, + mount fstype=debugfs -> /sys/kernel/debug/, + mount fstype=proc -> /proc/, + mount fstype=sysfs -> /sys/, + + deny @{PROC}/sys/fs/** wklx, + deny @{PROC}/sysrq-trigger rwklx, + deny @{PROC}/mem rwklx, + deny @{PROC}/kmem rwklx, + deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, + deny @{PROC}/sys/kernel/*/** wklx, + + deny mount options=(ro, remount) -> /, + deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, + deny mount fstype=devpts, + + deny /sys/[^f]*/** wklx, + deny /sys/f[^s]*/** wklx, + deny /sys/fs/[^c]*/** wklx, + deny /sys/fs/c[^g]*/** wklx, + deny /sys/fs/cg[^r]*/** wklx, + deny /sys/firmware/efi/efivars/** rwklx, + deny /sys/kernel/security/** rwklx, +} +` + +func generateProfile(out io.Writer) error { + compiled, err := template.New("apparmor_profile").Parse(baseTemplate) + if err != nil { + return err + } + data := &data{ + Name: "docker-default", + } + if tuntablesExists() { + data.Imports = append(data.Imports, "#include ") + } else { + data.Imports = append(data.Imports, "@{PROC}=/proc/") + } + if abstrctionsEsists() { + data.InnerImports = append(data.InnerImports, "#include ") + } + if err := compiled.Execute(out, data); err != nil { + return err + } + return nil +} + +// check if the tunables/global exist +func tuntablesExists() bool { + _, err := os.Stat("/etc/apparmor.d/tunables/global") + return err == nil +} + +// check if abstractions/base exist +func abstrctionsEsists() bool { + _, err := os.Stat("/etc/apparmor.d/abstractions/base") + return err == nil +} diff --git a/pkg/apparmor/setup.go b/pkg/apparmor/setup.go new file mode 100644 index 0000000000..2401f63414 --- /dev/null +++ b/pkg/apparmor/setup.go @@ -0,0 +1,83 @@ +package apparmor + +import ( + "fmt" + "io" + "os" + "os/exec" + "path" +) + +const ( + DefaultProfilePath = "/etc/apparmor.d/docker" +) + +func InstallDefaultProfile(backupPath string) error { + if !IsEnabled() { + return nil + } + + // If the profile already exists, check if we already have a backup + // if not, do the backup and override it. (docker 0.10 upgrade changed the apparmor profile) + // see gh#5049, apparmor blocks signals in ubuntu 14.04 + if _, err := os.Stat(DefaultProfilePath); err == nil { + if _, err := os.Stat(backupPath); err == nil { + // If both the profile and the backup are present, do nothing + return nil + } + // Make sure the directory exists + if err := os.MkdirAll(path.Dir(backupPath), 0755); err != nil { + return err + } + + // Create the backup file + f, err := os.Create(backupPath) + if err != nil { + return err + } + defer f.Close() + + src, err := os.Open(DefaultProfilePath) + if err != nil { + return err + } + defer src.Close() + + if _, err := io.Copy(f, src); err != nil { + return err + } + } + + // Make sure /etc/apparmor.d exists + if err := os.MkdirAll(path.Dir(DefaultProfilePath), 0755); err != nil { + return err + } + + f, err := os.OpenFile(DefaultProfilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return err + } + if err := generateProfile(f); err != nil { + f.Close() + return err + } + f.Close() + + cmd := exec.Command("/sbin/apparmor_parser", "-r", "-W", "docker") + // to use the parser directly we have to make sure we are in the correct + // dir with the profile + cmd.Dir = "/etc/apparmor.d" + + output, err := cmd.CombinedOutput() + if err != nil && !os.IsNotExist(err) { + if e, ok := err.(*exec.Error); ok { + // keeping with the current profile load code, if the parser does not + // exist then just return + if e.Err == exec.ErrNotFound || os.IsNotExist(e.Err) { + return nil + } + } + return fmt.Errorf("Error loading docker profile: %s (%s)", err, output) + } + return nil +} diff --git a/pkg/cgroups/apply_nosystemd.go b/pkg/cgroups/apply_nosystemd.go deleted file mode 100644 index f94d475907..0000000000 --- a/pkg/cgroups/apply_nosystemd.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build !linux - -package cgroups - -import ( - "fmt" -) - -func useSystemd() bool { - return false -} - -func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { - return nil, fmt.Errorf("Systemd not supported") -} diff --git a/pkg/cgroups/apply_raw.go b/pkg/cgroups/apply_raw.go deleted file mode 100644 index 471d3fcf53..0000000000 --- a/pkg/cgroups/apply_raw.go +++ /dev/null @@ -1,256 +0,0 @@ -package cgroups - -import ( - "fmt" - "os" - "path/filepath" - "strconv" -) - -type rawCgroup struct { - root string - cgroup string -} - -func rawApply(c *Cgroup, pid int) (ActiveCgroup, error) { - // We have two implementation of cgroups support, one is based on - // systemd and the dbus api, and one is based on raw cgroup fs operations - // following the pre-single-writer model docs at: - // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ - // - // we can pick any subsystem to find the root - - cgroupRoot, err := FindCgroupMountpoint("cpu") - if err != nil { - return nil, err - } - cgroupRoot = filepath.Dir(cgroupRoot) - - if _, err := os.Stat(cgroupRoot); err != nil { - return nil, fmt.Errorf("cgroups fs not found") - } - - cgroup := c.Name - if c.Parent != "" { - cgroup = filepath.Join(c.Parent, cgroup) - } - - raw := &rawCgroup{ - root: cgroupRoot, - cgroup: cgroup, - } - for _, g := range []func(*Cgroup, int) error{ - raw.setupDevices, - raw.setupMemory, - raw.setupCpu, - raw.setupCpuset, - raw.setupCpuacct, - raw.setupBlkio, - raw.setupPerfevent, - raw.setupFreezer, - } { - if err := g(c, pid); err != nil { - return nil, err - } - } - - return raw, nil -} - -func (raw *rawCgroup) path(subsystem string) (string, error) { - initPath, err := GetInitCgroupDir(subsystem) - if err != nil { - return "", err - } - return filepath.Join(raw.root, subsystem, initPath, raw.cgroup), nil -} - -func (raw *rawCgroup) join(subsystem string, pid int) (string, error) { - path, err := raw.path(subsystem) - if err != nil { - return "", err - } - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return "", err - } - if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil { - return "", err - } - return path, nil -} - -func (raw *rawCgroup) setupDevices(c *Cgroup, pid int) (err error) { - dir, err := raw.join("devices", pid) - if err != nil { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if !c.DeviceAccess { - - if err := writeFile(dir, "devices.deny", "a"); err != nil { - return err - } - - allow := []string{ - // allow mknod for any device - "c *:* m", - "b *:* m", - - // /dev/null, zero, full - "c 1:3 rwm", - "c 1:5 rwm", - "c 1:7 rwm", - - // consoles - "c 5:1 rwm", - "c 5:0 rwm", - "c 4:0 rwm", - "c 4:1 rwm", - - // /dev/urandom,/dev/random - "c 1:9 rwm", - "c 1:8 rwm", - - // /dev/pts/ - pts namespaces are "coming soon" - "c 136:* rwm", - "c 5:2 rwm", - - // tuntap - "c 10:200 rwm", - } - - for _, val := range allow { - if err := writeFile(dir, "devices.allow", val); err != nil { - return err - } - } - } - return nil -} - -func (raw *rawCgroup) setupMemory(c *Cgroup, pid int) (err error) { - dir, err := raw.join("memory", pid) - // only return an error for memory if it was not specified - if err != nil && (c.Memory != 0 || c.MemorySwap != 0) { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if c.Memory != 0 || c.MemorySwap != 0 { - if c.Memory != 0 { - if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { - return err - } - if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { - return err - } - } - // By default, MemorySwap is set to twice the size of RAM. - // If you want to omit MemorySwap, set it to `-1'. - if c.MemorySwap != -1 { - if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil { - return err - } - } - } - return nil -} - -func (raw *rawCgroup) setupCpu(c *Cgroup, pid int) (err error) { - // We always want to join the cpu group, to allow fair cpu scheduling - // on a container basis - dir, err := raw.join("cpu", pid) - if err != nil { - return err - } - if c.CpuShares != 0 { - if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil { - return err - } - } - return nil -} - -func (raw *rawCgroup) setupCpuset(c *Cgroup, pid int) (err error) { - // we don't want to join this cgroup unless it is specified - if c.CpusetCpus != "" { - dir, err := raw.join("cpuset", pid) - if err != nil && c.CpusetCpus != "" { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if err := writeFile(dir, "cpuset.cpus", c.CpusetCpus); err != nil { - return err - } - } - return nil -} - -func (raw *rawCgroup) setupCpuacct(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("cpuacct", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) setupBlkio(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("blkio", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) setupPerfevent(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("perf_event", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) setupFreezer(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("freezer", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) Cleanup() error { - get := func(subsystem string) string { - path, _ := raw.path(subsystem) - return path - } - - for _, path := range []string{ - get("memory"), - get("devices"), - get("cpu"), - get("cpuset"), - get("cpuacct"), - get("blkio"), - get("perf_event"), - get("freezer"), - } { - if path != "" { - os.RemoveAll(path) - } - } - return nil -} diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index e5e8f82db6..3aac971340 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -1,14 +1,7 @@ package cgroups import ( - "bufio" "errors" - "github.com/dotcloud/docker/pkg/mount" - "io" - "io/ioutil" - "os" - "path/filepath" - "strings" ) var ( @@ -31,77 +24,3 @@ type Cgroup struct { type ActiveCgroup interface { Cleanup() error } - -// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt -func FindCgroupMountpoint(subsystem string) (string, error) { - mounts, err := mount.GetMounts() - if err != nil { - return "", err - } - - for _, mount := range mounts { - if mount.Fstype == "cgroup" { - for _, opt := range strings.Split(mount.VfsOpts, ",") { - if opt == subsystem { - return mount.Mountpoint, nil - } - } - } - } - return "", ErrNotFound -} - -// Returns the relative path to the cgroup docker is running in. -func GetThisCgroupDir(subsystem string) (string, error) { - f, err := os.Open("/proc/self/cgroup") - if err != nil { - return "", err - } - defer f.Close() - - return parseCgroupFile(subsystem, f) -} - -func GetInitCgroupDir(subsystem string) (string, error) { - f, err := os.Open("/proc/1/cgroup") - if err != nil { - return "", err - } - defer f.Close() - - return parseCgroupFile(subsystem, f) -} - -func parseCgroupFile(subsystem string, r io.Reader) (string, error) { - s := bufio.NewScanner(r) - for s.Scan() { - if err := s.Err(); err != nil { - return "", err - } - text := s.Text() - parts := strings.Split(text, ":") - for _, subs := range strings.Split(parts[1], ",") { - if subs == subsystem { - return parts[2], nil - } - } - } - return "", ErrNotFound -} - -func writeFile(dir, file, data string) error { - return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) -} - -func (c *Cgroup) Apply(pid int) (ActiveCgroup, error) { - // We have two implementation of cgroups support, one is based on - // systemd and the dbus api, and one is based on raw cgroup fs operations - // following the pre-single-writer model docs at: - // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ - - if useSystemd() { - return systemdApply(c, pid) - } else { - return rawApply(c, pid) - } -} diff --git a/pkg/cgroups/fs/apply_raw.go b/pkg/cgroups/fs/apply_raw.go new file mode 100644 index 0000000000..60f318e9ef --- /dev/null +++ b/pkg/cgroups/fs/apply_raw.go @@ -0,0 +1,146 @@ +package fs + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + "github.com/dotcloud/docker/pkg/cgroups" +) + +var ( + subsystems = map[string]subsystem{ + "devices": &devicesGroup{}, + "memory": &memoryGroup{}, + "cpu": &cpuGroup{}, + "cpuset": &cpusetGroup{}, + "cpuacct": &cpuacctGroup{}, + "blkio": &blkioGroup{}, + "perf_event": &perfEventGroup{}, + "freezer": &freezerGroup{}, + } +) + +type subsystem interface { + Set(*data) error + Remove(*data) error + Stats(*data) (map[string]float64, error) +} + +type data struct { + root string + cgroup string + c *cgroups.Cgroup + pid int +} + +func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { + // We have two implementation of cgroups support, one is based on + // systemd and the dbus api, and one is based on raw cgroup fs operations + // following the pre-single-writer model docs at: + // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ + // + // we can pick any subsystem to find the root + + cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return nil, fmt.Errorf("cgroups fs not found") + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + d := &data{ + root: cgroupRoot, + cgroup: cgroup, + c: c, + pid: pid, + } + for _, sys := range subsystems { + if err := sys.Set(d); err != nil { + return nil, err + } + } + return d, nil +} + +func GetStats(c *cgroups.Cgroup, subsystem string, pid int) (map[string]float64, error) { + cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return nil, fmt.Errorf("cgroups fs not found") + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + d := &data{ + root: cgroupRoot, + cgroup: cgroup, + c: c, + pid: pid, + } + sys, exists := subsystems[subsystem] + if !exists { + return nil, fmt.Errorf("subsystem %s does not exist", subsystem) + } + return sys.Stats(d) +} + +func (raw *data) path(subsystem string) (string, error) { + initPath, err := cgroups.GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + return filepath.Join(raw.root, subsystem, initPath, raw.cgroup), nil +} + +func (raw *data) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + if err := writeFile(path, "cgroup.procs", strconv.Itoa(raw.pid)); err != nil { + return "", err + } + return path, nil +} + +func (raw *data) Cleanup() error { + for _, sys := range subsystems { + sys.Remove(raw) + } + return nil +} + +func writeFile(dir, file, data string) error { + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} diff --git a/pkg/cgroups/fs/blkio.go b/pkg/cgroups/fs/blkio.go new file mode 100644 index 0000000000..79e14fa2dc --- /dev/null +++ b/pkg/cgroups/fs/blkio.go @@ -0,0 +1,121 @@ +package fs + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/dotcloud/docker/pkg/cgroups" +) + +type blkioGroup struct { +} + +func (s *blkioGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("blkio"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *blkioGroup) Remove(d *data) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ +func (s *blkioGroup) Stats(d *data) (map[string]float64, error) { + var ( + paramData = make(map[string]float64) + params = []string{ + "io_service_bytes_recursive", + "io_serviced_recursive", + "io_queued_recursive", + } + ) + + path, err := d.path("blkio") + if err != nil { + return nil, err + } + + k, v, err := s.getSectors(path) + if err != nil { + return nil, err + } + paramData[fmt.Sprintf("blkio.sectors_recursive:%s", k)] = v + + for _, param := range params { + f, err := os.Open(filepath.Join(path, fmt.Sprintf("blkio.%s", param))) + if err != nil { + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.Fields(sc.Text()) + switch len(fields) { + case 3: + v, err := strconv.ParseFloat(fields[2], 64) + if err != nil { + return nil, err + } + paramData[fmt.Sprintf("%s:%s:%s", param, fields[0], fields[1])] = v + case 2: + // this is the total line, skip + default: + return nil, ErrNotValidFormat + } + } + } + return paramData, nil +} + +func (s *blkioGroup) getSectors(path string) (string, float64, error) { + f, err := os.Open(filepath.Join(path, "blkio.sectors_recursive")) + if err != nil { + return "", 0, err + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + return "", 0, err + } + return getCgroupParamKeyValue(string(data)) +} diff --git a/pkg/cgroups/fs/cpu.go b/pkg/cgroups/fs/cpu.go new file mode 100644 index 0000000000..8eb0c4ff46 --- /dev/null +++ b/pkg/cgroups/fs/cpu.go @@ -0,0 +1,33 @@ +package fs + +import ( + "strconv" +) + +type cpuGroup struct { +} + +func (s *cpuGroup) Set(d *data) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + dir, err := d.join("cpu") + if err != nil { + return err + } + if d.c.CpuShares != 0 { + if err := writeFile(dir, "cpu.shares", strconv.FormatInt(d.c.CpuShares, 10)); err != nil { + return err + } + } + return nil +} + +func (s *cpuGroup) Remove(d *data) error { + return removePath(d.path("cpu")) +} + +func (s *cpuGroup) Stats(d *data) (map[string]float64, error) { + // we can reuse the cpuacct subsystem to get the cpu stats + sys := subsystems["cpuacct"] + return sys.Stats(d) +} diff --git a/pkg/cgroups/fs/cpuacct.go b/pkg/cgroups/fs/cpuacct.go new file mode 100644 index 0000000000..4ea2b1f51b --- /dev/null +++ b/pkg/cgroups/fs/cpuacct.go @@ -0,0 +1,131 @@ +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "time" + + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/system" +) + +var ( + cpuCount = float64(runtime.NumCPU()) + clockTicks = float64(system.GetClockTicks()) +) + +type cpuacctGroup struct { +} + +func (s *cpuacctGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *cpuacctGroup) Remove(d *data) error { + return removePath(d.path("cpuacct")) +} + +func (s *cpuacctGroup) Stats(d *data) (map[string]float64, error) { + var ( + startCpu, lastCpu, startSystem, lastSystem float64 + percentage float64 + paramData = make(map[string]float64) + ) + path, err := d.path("cpuacct") + if startCpu, err = s.getCpuUsage(d, path); err != nil { + return nil, err + } + if startSystem, err = s.getSystemCpuUsage(d); err != nil { + return nil, err + } + // sample for 100ms + time.Sleep(100 * time.Millisecond) + if lastCpu, err = s.getCpuUsage(d, path); err != nil { + return nil, err + } + if lastSystem, err = s.getSystemCpuUsage(d); err != nil { + return nil, err + } + + var ( + deltaProc = lastCpu - startCpu + deltaSystem = lastSystem - startSystem + ) + if deltaSystem > 0.0 { + percentage = ((deltaProc / deltaSystem) * clockTicks) * cpuCount + } + // NOTE: a percentage over 100% is valid for POSIX because that means the + // processes is using multiple cores + paramData["percentage"] = percentage + return paramData, nil +} + +func (s *cpuacctGroup) getProcStarttime(d *data) (float64, error) { + rawStart, err := system.GetProcessStartTime(d.pid) + if err != nil { + return 0, err + } + return strconv.ParseFloat(rawStart, 64) +} + +func (s *cpuacctGroup) getSystemCpuUsage(d *data) (float64, error) { + + f, err := os.Open("/proc/stat") + if err != nil { + return 0, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + parts := strings.Fields(sc.Text()) + switch parts[0] { + case "cpu": + if len(parts) < 8 { + return 0, fmt.Errorf("invalid number of cpu fields") + } + + var total float64 + for _, i := range parts[1:8] { + v, err := strconv.ParseFloat(i, 64) + if err != nil { + return 0.0, fmt.Errorf("Unable to convert value %s to float: %s", i, err) + } + total += v + } + return total, nil + default: + continue + } + } + return 0, fmt.Errorf("invalid stat format") +} + +func (s *cpuacctGroup) getCpuUsage(d *data, path string) (float64, error) { + cpuTotal := 0.0 + f, err := os.Open(filepath.Join(path, "cpuacct.stat")) + if err != nil { + return 0.0, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + _, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return 0.0, err + } + // set the raw data in map + cpuTotal += v + } + return cpuTotal, nil +} diff --git a/pkg/cgroups/fs/cpuset.go b/pkg/cgroups/fs/cpuset.go new file mode 100644 index 0000000000..8a13c56cea --- /dev/null +++ b/pkg/cgroups/fs/cpuset.go @@ -0,0 +1,36 @@ +package fs + +import ( + "os" +) + +type cpusetGroup struct { +} + +func (s *cpusetGroup) Set(d *data) error { + // we don't want to join this cgroup unless it is specified + if d.c.CpusetCpus != "" { + dir, err := d.join("cpuset") + if err != nil && d.c.CpusetCpus != "" { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if err := writeFile(dir, "cpuset.cpus", d.c.CpusetCpus); err != nil { + return err + } + } + return nil +} + +func (s *cpusetGroup) Remove(d *data) error { + return removePath(d.path("cpuset")) +} + +func (s *cpusetGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/pkg/cgroups/fs/devices.go b/pkg/cgroups/fs/devices.go new file mode 100644 index 0000000000..a2f91eda14 --- /dev/null +++ b/pkg/cgroups/fs/devices.go @@ -0,0 +1,69 @@ +package fs + +import ( + "os" +) + +type devicesGroup struct { +} + +func (s *devicesGroup) Set(d *data) error { + dir, err := d.join("devices") + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if !d.c.DeviceAccess { + if err := writeFile(dir, "devices.deny", "a"); err != nil { + return err + } + + allow := []string{ + // allow mknod for any device + "c *:* m", + "b *:* m", + + // /dev/null, zero, full + "c 1:3 rwm", + "c 1:5 rwm", + "c 1:7 rwm", + + // consoles + "c 5:1 rwm", + "c 5:0 rwm", + "c 4:0 rwm", + "c 4:1 rwm", + + // /dev/urandom,/dev/random + "c 1:9 rwm", + "c 1:8 rwm", + + // /dev/pts/ - pts namespaces are "coming soon" + "c 136:* rwm", + "c 5:2 rwm", + + // tuntap + "c 10:200 rwm", + } + + for _, val := range allow { + if err := writeFile(dir, "devices.allow", val); err != nil { + return err + } + } + } + return nil +} + +func (s *devicesGroup) Remove(d *data) error { + return removePath(d.path("devices")) +} + +func (s *devicesGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/pkg/cgroups/fs/freezer.go b/pkg/cgroups/fs/freezer.go new file mode 100644 index 0000000000..ebf5bb9672 --- /dev/null +++ b/pkg/cgroups/fs/freezer.go @@ -0,0 +1,62 @@ +package fs + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/cgroups" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" +) + +type freezerGroup struct { +} + +func (s *freezerGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("freezer"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *freezerGroup) Remove(d *data) error { + return removePath(d.path("freezer")) +} + +func (s *freezerGroup) Stats(d *data) (map[string]float64, error) { + var ( + paramData = make(map[string]float64) + params = []string{ + "parent_freezing", + "self_freezing", + // comment out right now because this is string "state", + } + ) + + path, err := d.path("freezer") + if err != nil { + return nil, err + } + + for _, param := range params { + f, err := os.Open(filepath.Join(path, fmt.Sprintf("freezer.%s", param))) + if err != nil { + return nil, err + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + + v, err := strconv.ParseFloat(strings.TrimSuffix(string(data), "\n"), 64) + if err != nil { + return nil, err + } + paramData[param] = v + } + return paramData, nil +} diff --git a/pkg/cgroups/fs/memory.go b/pkg/cgroups/fs/memory.go new file mode 100644 index 0000000000..cf4bf5ab73 --- /dev/null +++ b/pkg/cgroups/fs/memory.go @@ -0,0 +1,71 @@ +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" +) + +type memoryGroup struct { +} + +func (s *memoryGroup) Set(d *data) error { + dir, err := d.join("memory") + // only return an error for memory if it was not specified + if err != nil && (d.c.Memory != 0 || d.c.MemorySwap != 0) { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if d.c.Memory != 0 || d.c.MemorySwap != 0 { + if d.c.Memory != 0 { + if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil { + return err + } + if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil { + return err + } + } + // By default, MemorySwap is set to twice the size of RAM. + // If you want to omit MemorySwap, set it to `-1'. + if d.c.MemorySwap != -1 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil { + return err + } + } + } + return nil +} + +func (s *memoryGroup) Remove(d *data) error { + return removePath(d.path("memory")) +} + +func (s *memoryGroup) Stats(d *data) (map[string]float64, error) { + paramData := make(map[string]float64) + path, err := d.path("memory") + if err != nil { + return nil, err + } + + f, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return nil, err + } + paramData[t] = v + } + return paramData, nil +} diff --git a/pkg/cgroups/fs/perf_event.go b/pkg/cgroups/fs/perf_event.go new file mode 100644 index 0000000000..789b3e59ad --- /dev/null +++ b/pkg/cgroups/fs/perf_event.go @@ -0,0 +1,24 @@ +package fs + +import ( + "github.com/dotcloud/docker/pkg/cgroups" +) + +type perfEventGroup struct { +} + +func (s *perfEventGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *perfEventGroup) Remove(d *data) error { + return removePath(d.path("perf_event")) +} + +func (s *perfEventGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/pkg/cgroups/fs/utils.go b/pkg/cgroups/fs/utils.go new file mode 100644 index 0000000000..f4c4846b8c --- /dev/null +++ b/pkg/cgroups/fs/utils.go @@ -0,0 +1,29 @@ +package fs + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +var ( + ErrNotSupportStat = errors.New("stats are not supported for subsystem") + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func getCgroupParamKeyValue(t string) (string, float64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := strconv.ParseFloat(parts[1], 64) + if err != nil { + return "", 0.0, fmt.Errorf("Unable to convert param value to float: %s", err) + } + return parts[0], value, nil + default: + return "", 0.0, ErrNotValidFormat + } +} diff --git a/pkg/cgroups/systemd/apply_nosystemd.go b/pkg/cgroups/systemd/apply_nosystemd.go new file mode 100644 index 0000000000..226aa59f9d --- /dev/null +++ b/pkg/cgroups/systemd/apply_nosystemd.go @@ -0,0 +1,16 @@ +// +build !linux + +package systemd + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/cgroups" +) + +func UseSystemd() bool { + return false +} + +func systemdApply(c *Cgroup, pid int) (cgroups.ActiveCgroup, error) { + return nil, fmt.Errorf("Systemd not supported") +} diff --git a/pkg/cgroups/apply_systemd.go b/pkg/cgroups/systemd/apply_systemd.go similarity index 86% rename from pkg/cgroups/apply_systemd.go rename to pkg/cgroups/systemd/apply_systemd.go index a9b3a8d301..7c26080d6e 100644 --- a/pkg/cgroups/apply_systemd.go +++ b/pkg/cgroups/systemd/apply_systemd.go @@ -1,27 +1,35 @@ // +build linux -package cgroups +package systemd import ( "fmt" - systemd1 "github.com/coreos/go-systemd/dbus" - "github.com/dotcloud/docker/pkg/systemd" - "github.com/godbus/dbus" + "io/ioutil" "path/filepath" "strings" "sync" + + systemd1 "github.com/coreos/go-systemd/dbus" + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/systemd" + "github.com/godbus/dbus" ) type systemdCgroup struct { } +type DeviceAllow struct { + Node string + Permissions string +} + var ( connLock sync.Mutex theConn *systemd1.Conn hasStartTransientUnit bool ) -func useSystemd() bool { +func UseSystemd() bool { if !systemd.SdBooted() { return false } @@ -48,15 +56,9 @@ func useSystemd() bool { } } } - return hasStartTransientUnit } -type DeviceAllow struct { - Node string - Permissions string -} - func getIfaceForUnit(unitName string) string { if strings.HasSuffix(unitName, ".scope") { return "Scope" @@ -67,11 +69,12 @@ func getIfaceForUnit(unitName string) string { return "Unit" } -func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { - unitName := c.Parent + "-" + c.Name + ".scope" - slice := "system.slice" - - var properties []systemd1.Property +func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { + var ( + unitName = c.Parent + "-" + c.Name + ".scope" + slice = "system.slice" + properties []systemd1.Property + ) for _, v := range c.UnitProperties { switch v[0] { @@ -85,7 +88,8 @@ func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { properties = append(properties, systemd1.Property{"Slice", dbus.MakeVariant(slice)}, systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)}, - systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}) + systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}, + ) if !c.DeviceAccess { properties = append(properties, @@ -138,7 +142,7 @@ func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { cgroup := props["ControlGroup"].(string) if !c.DeviceAccess { - mountpoint, err := FindCgroupMountpoint("devices") + mountpoint, err := cgroups.FindCgroupMountpoint("devices") if err != nil { return nil, err } @@ -146,15 +150,14 @@ func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { path := filepath.Join(mountpoint, cgroup) // /dev/pts/* - if err := writeFile(path, "devices.allow", "c 136:* rwm"); err != nil { + if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte("c 136:* rwm"), 0700); err != nil { return nil, err } // tuntap - if err := writeFile(path, "devices.allow", "c 10:200 rwm"); err != nil { + if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte("c 10:200 rwm"), 0700); err != nil { return nil, err } } - return &systemdCgroup{}, nil } diff --git a/pkg/cgroups/utils.go b/pkg/cgroups/utils.go new file mode 100644 index 0000000000..02a7f357f6 --- /dev/null +++ b/pkg/cgroups/utils.go @@ -0,0 +1,67 @@ +package cgroups + +import ( + "bufio" + "io" + "os" + "strings" + + "github.com/dotcloud/docker/pkg/mount" +) + +// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt +func FindCgroupMountpoint(subsystem string) (string, error) { + mounts, err := mount.GetMounts() + if err != nil { + return "", err + } + + for _, mount := range mounts { + if mount.Fstype == "cgroup" { + for _, opt := range strings.Split(mount.VfsOpts, ",") { + if opt == subsystem { + return mount.Mountpoint, nil + } + } + } + } + return "", ErrNotFound +} + +// Returns the relative path to the cgroup docker is running in. +func GetThisCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return parseCgroupFile(subsystem, f) +} + +func GetInitCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/1/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return parseCgroupFile(subsystem, f) +} + +func parseCgroupFile(subsystem string, r io.Reader) (string, error) { + s := bufio.NewScanner(r) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + text := s.Text() + parts := strings.Split(text, ":") + for _, subs := range strings.Split(parts[1], ",") { + if subs == subsystem { + return parts[2], nil + } + } + } + return "", ErrNotFound +} diff --git a/pkg/graphdb/conn_linux.go b/pkg/graphdb/conn_sqlite3.go similarity index 92% rename from pkg/graphdb/conn_linux.go rename to pkg/graphdb/conn_sqlite3.go index 7a1ab8c92f..5b5f8e6bfc 100644 --- a/pkg/graphdb/conn_linux.go +++ b/pkg/graphdb/conn_sqlite3.go @@ -1,4 +1,4 @@ -// +build amd64 +// +build linux,amd64 freebsd,cgo package graphdb diff --git a/pkg/graphdb/conn_unsupported.go b/pkg/graphdb/conn_unsupported.go index c2d602569f..0a48634336 100644 --- a/pkg/graphdb/conn_unsupported.go +++ b/pkg/graphdb/conn_unsupported.go @@ -1,4 +1,4 @@ -// +build !linux !amd64 +// +build !linux,!freebsd linux,!amd64 freebsd,!cgo package graphdb diff --git a/pkg/libcontainer/apparmor/setup.go b/pkg/libcontainer/apparmor/setup.go deleted file mode 100644 index 548e72f550..0000000000 --- a/pkg/libcontainer/apparmor/setup.go +++ /dev/null @@ -1,128 +0,0 @@ -package apparmor - -import ( - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "path" -) - -const ( - DefaultProfilePath = "/etc/apparmor.d/docker" -) - -const DefaultProfile = ` -# AppArmor profile from lxc for containers. - -#include -profile docker-default flags=(attach_disconnected,mediate_deleted) { - #include - network, - capability, - file, - umount, - - # ignore DENIED message on / remount - deny mount options=(ro, remount) -> /, - - # allow tmpfs mounts everywhere - mount fstype=tmpfs, - - # allow mqueue mounts everywhere - mount fstype=mqueue, - - # allow fuse mounts everywhere - mount fstype=fuse.*, - - # allow bind mount of /lib/init/fstab for lxcguest - mount options=(rw, bind) /lib/init/fstab.lxc/ -> /lib/init/fstab/, - - # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted - mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/, - deny @{PROC}/sys/fs/** wklx, - - # allow efivars to be mounted, writing to it will be blocked though - mount fstype=efivarfs -> /sys/firmware/efi/efivars/, - - # block some other dangerous paths - deny @{PROC}/sysrq-trigger rwklx, - deny @{PROC}/mem rwklx, - deny @{PROC}/kmem rwklx, - deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, - deny @{PROC}/sys/kernel/*/** wklx, - - # deny writes in /sys except for /sys/fs/cgroup, also allow - # fusectl, securityfs and debugfs to be mounted there (read-only) - mount fstype=fusectl -> /sys/fs/fuse/connections/, - mount fstype=securityfs -> /sys/kernel/security/, - mount fstype=debugfs -> /sys/kernel/debug/, - deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, - mount fstype=proc -> /proc/, - mount fstype=sysfs -> /sys/, - deny /sys/[^f]*/** wklx, - deny /sys/f[^s]*/** wklx, - deny /sys/fs/[^c]*/** wklx, - deny /sys/fs/c[^g]*/** wklx, - deny /sys/fs/cg[^r]*/** wklx, - deny /sys/firmware/efi/efivars/** rwklx, - deny /sys/kernel/security/** rwklx, - mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, - - # the container may never be allowed to mount devpts. If it does, it - # will remount the host's devpts. We could allow it to do it with - # the newinstance option (but, right now, we don't). - deny mount fstype=devpts, -} -` - -func InstallDefaultProfile(backupPath string) error { - if !IsEnabled() { - return nil - } - - // If the profile already exists, check if we already have a backup - // if not, do the backup and override it. (docker 0.10 upgrade changed the apparmor profile) - // see gh#5049, apparmor blocks signals in ubuntu 14.04 - if _, err := os.Stat(DefaultProfilePath); err == nil { - if _, err := os.Stat(backupPath); err == nil { - // If both the profile and the backup are present, do nothing - return nil - } - // Make sure the directory exists - if err := os.MkdirAll(path.Dir(backupPath), 0755); err != nil { - return err - } - - // Create the backup file - f, err := os.Create(backupPath) - if err != nil { - return err - } - defer f.Close() - src, err := os.Open(DefaultProfilePath) - if err != nil { - return err - } - defer src.Close() - if _, err := io.Copy(f, src); err != nil { - return err - } - } - - // Make sure /etc/apparmor.d exists - if err := os.MkdirAll(path.Dir(DefaultProfilePath), 0755); err != nil { - return err - } - - if err := ioutil.WriteFile(DefaultProfilePath, []byte(DefaultProfile), 0644); err != nil { - return err - } - - output, err := exec.Command("/lib/init/apparmor-profile-load", "docker").CombinedOutput() - if err != nil { - return fmt.Errorf("Error loading docker profile: %s (%s)", err, output) - } - return nil -} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index c07c45de3c..e76e060d1c 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -8,6 +8,8 @@ import ( "syscall" "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/cgroups/fs" + "github.com/dotcloud/docker/pkg/cgroups/systemd" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" @@ -99,7 +101,11 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) { if container.Cgroups != nil { - return container.Cgroups.Apply(nspid) + c := container.Cgroups + if systemd.UseSystemd() { + return systemd.Apply(c, nspid) + } + return fs.Apply(c, nspid) } return nil, nil } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index b6c02eafd5..0e85c0e4be 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -8,9 +8,9 @@ import ( "runtime" "syscall" + "github.com/dotcloud/docker/pkg/apparmor" "github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/apparmor" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/utils" diff --git a/pkg/system/sysconfig.go b/pkg/system/sysconfig.go new file mode 100644 index 0000000000..dcbe6c9cdd --- /dev/null +++ b/pkg/system/sysconfig.go @@ -0,0 +1,13 @@ +// +build linux,cgo + +package system + +/* +#include +int get_hz(void) { return sysconf(_SC_CLK_TCK); } +*/ +import "C" + +func GetClockTicks() int { + return int(C.get_hz()) +} diff --git a/pkg/system/sysconfig_nocgo.go b/pkg/system/sysconfig_nocgo.go new file mode 100644 index 0000000000..7ca3488154 --- /dev/null +++ b/pkg/system/sysconfig_nocgo.go @@ -0,0 +1,9 @@ +// +build linux,!cgo + +package system + +func GetClockTicks() int { + // when we cannot call out to C to get the sysconf it is fairly safe to + // just return 100 + return 100 +} diff --git a/pkg/system/unsupported.go b/pkg/system/unsupported.go index c52a1e5d00..4ae2a488aa 100644 --- a/pkg/system/unsupported.go +++ b/pkg/system/unsupported.go @@ -17,3 +17,9 @@ func UsetCloseOnExec(fd uintptr) error { func Gettid() int { return 0 } + +func GetClockTicks() int { + // when we cannot call out to C to get the sysconf it is fairly safe to + // just return 100 + return 100 +} diff --git a/server/MAINTAINERS b/server/MAINTAINERS new file mode 100644 index 0000000000..db33365bcd --- /dev/null +++ b/server/MAINTAINERS @@ -0,0 +1 @@ +Solomon Hykes diff --git a/server/server.go b/server/server.go index 72244f4e6b..0ab0a4a00b 100644 --- a/server/server.go +++ b/server/server.go @@ -1,3 +1,24 @@ +// DEPRECATION NOTICE. PLEASE DO NOT ADD ANYTHING TO THIS FILE. +// +// server/server.go is deprecated. We are working on breaking it up into smaller, cleaner +// pieces which will be easier to find and test. This will help make the code less +// redundant and more readable. +// +// Contributors, please don't add anything to server/server.go, unless it has the explicit +// goal of helping the deprecation effort. +// +// Maintainers, please refuse patches which add code to server/server.go. +// +// Instead try the following files: +// * For code related to local image management, try graph/ +// * For code related to image downloading, uploading, remote search etc, try registry/ +// * For code related to the docker daemon, try daemon/ +// * For small utilities which could potentially be useful outside of Docker, try pkg/ +// * For miscalleneous "util" functions which are docker-specific, try encapsulating them +// inside one of the subsystems above. If you really think they should be more widely +// available, are you sure you can't remove the docker dependencies and move them to +// pkg? In last resort, you can add them to utils/ (but please try not to). + package server import (