mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Update restrictions for better handling of mounts
This also cleans up some of the left over restriction paths code from before. Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
parent
83982e8b1d
commit
f5139233b9
8 changed files with 54 additions and 107 deletions
|
@ -2,12 +2,6 @@ package lxc
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/dotcloud/docker/daemon/execdriver"
|
|
||||||
"github.com/dotcloud/docker/pkg/cgroups"
|
|
||||||
"github.com/dotcloud/docker/pkg/label"
|
|
||||||
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
|
|
||||||
"github.com/dotcloud/docker/pkg/system"
|
|
||||||
"github.com/dotcloud/docker/utils"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
@ -18,6 +12,13 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/dotcloud/docker/daemon/execdriver"
|
||||||
|
"github.com/dotcloud/docker/pkg/cgroups"
|
||||||
|
"github.com/dotcloud/docker/pkg/label"
|
||||||
|
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
|
||||||
|
"github.com/dotcloud/docker/pkg/system"
|
||||||
|
"github.com/dotcloud/docker/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DriverName = "lxc"
|
const DriverName = "lxc"
|
||||||
|
@ -27,31 +28,26 @@ func init() {
|
||||||
if err := setupEnv(args); err != nil {
|
if err := setupEnv(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := setupHostname(args); err != nil {
|
if err := setupHostname(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := setupNetworking(args); err != nil {
|
if err := setupNetworking(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if !args.Privileged {
|
||||||
if err := restrict.Restrict("/", "/empty"); err != nil {
|
if err := restrict.Restrict(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := setupCapabilities(args); err != nil {
|
if err := setupCapabilities(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := setupWorkingDirectory(args); err != nil {
|
if err := setupWorkingDirectory(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := system.CloseFdsFrom(3); err != nil {
|
if err := system.CloseFdsFrom(3); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := changeUser(args); err != nil {
|
if err := changeUser(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -69,10 +65,9 @@ func init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
type driver struct {
|
type driver struct {
|
||||||
root string // root path for the driver to use
|
root string // root path for the driver to use
|
||||||
apparmor bool
|
apparmor bool
|
||||||
sharedRoot bool
|
sharedRoot bool
|
||||||
restrictionPath string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewDriver(root string, apparmor bool) (*driver, error) {
|
func NewDriver(root string, apparmor bool) (*driver, error) {
|
||||||
|
@ -80,15 +75,10 @@ func NewDriver(root string, apparmor bool) (*driver, error) {
|
||||||
if err := linkLxcStart(root); err != nil {
|
if err := linkLxcStart(root); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
restrictionPath := filepath.Join(root, "empty")
|
|
||||||
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return &driver{
|
return &driver{
|
||||||
apparmor: apparmor,
|
apparmor: apparmor,
|
||||||
root: root,
|
root: root,
|
||||||
sharedRoot: rootIsShared(),
|
sharedRoot: rootIsShared(),
|
||||||
restrictionPath: restrictionPath,
|
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -419,16 +409,14 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
|
||||||
|
|
||||||
if err := LxcTemplateCompiled.Execute(fo, struct {
|
if err := LxcTemplateCompiled.Execute(fo, struct {
|
||||||
*execdriver.Command
|
*execdriver.Command
|
||||||
AppArmor bool
|
AppArmor bool
|
||||||
ProcessLabel string
|
ProcessLabel string
|
||||||
MountLabel string
|
MountLabel string
|
||||||
RestrictionSource string
|
|
||||||
}{
|
}{
|
||||||
Command: c,
|
Command: c,
|
||||||
AppArmor: d.apparmor,
|
AppArmor: d.apparmor,
|
||||||
ProcessLabel: process,
|
ProcessLabel: process,
|
||||||
MountLabel: mount,
|
MountLabel: mount,
|
||||||
RestrictionSource: d.restrictionPath,
|
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
package lxc
|
package lxc
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/dotcloud/docker/daemon/execdriver"
|
|
||||||
"github.com/dotcloud/docker/pkg/label"
|
|
||||||
"strings"
|
"strings"
|
||||||
"text/template"
|
"text/template"
|
||||||
|
|
||||||
|
"github.com/dotcloud/docker/daemon/execdriver"
|
||||||
|
"github.com/dotcloud/docker/pkg/label"
|
||||||
)
|
)
|
||||||
|
|
||||||
const LxcTemplate = `
|
const LxcTemplate = `
|
||||||
|
@ -110,13 +111,6 @@ lxc.aa_profile = unconfined
|
||||||
{{else}}
|
{{else}}
|
||||||
# Let AppArmor normal confinement take place (i.e., not unconfined)
|
# Let AppArmor normal confinement take place (i.e., not unconfined)
|
||||||
{{end}}
|
{{end}}
|
||||||
{{else}}
|
|
||||||
# Restrict access to some stuff in /proc. Note that /proc is already mounted
|
|
||||||
# read-only, so we don't need to bother about things that are just dangerous
|
|
||||||
# to write to (like sysrq-trigger). Also, recent kernels won't let a container
|
|
||||||
# peek into /proc/kcore, but let's cater for people who might run Docker on
|
|
||||||
# older kernels. Just in case.
|
|
||||||
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
|
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|
||||||
# limits
|
# limits
|
||||||
|
|
|
@ -24,7 +24,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
|
||||||
container.Cgroups.Name = c.ID
|
container.Cgroups.Name = c.ID
|
||||||
// check to see if we are running in ramdisk to disable pivot root
|
// check to see if we are running in ramdisk to disable pivot root
|
||||||
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
||||||
container.Context["restriction_path"] = d.restrictionPath
|
container.Context["restrictions"] = "true"
|
||||||
|
|
||||||
if err := d.createNetwork(container, c); err != nil {
|
if err := d.createNetwork(container, c); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -84,7 +84,7 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
|
||||||
}
|
}
|
||||||
container.Cgroups.DeviceAccess = true
|
container.Cgroups.DeviceAccess = true
|
||||||
|
|
||||||
delete(container.Context, "restriction_path")
|
delete(container.Context, "restrictions")
|
||||||
|
|
||||||
if apparmor.IsEnabled() {
|
if apparmor.IsEnabled() {
|
||||||
container.Context["apparmor_profile"] = "unconfined"
|
container.Context["apparmor_profile"] = "unconfined"
|
||||||
|
|
|
@ -57,7 +57,6 @@ type driver struct {
|
||||||
root string
|
root string
|
||||||
initPath string
|
initPath string
|
||||||
activeContainers map[string]*exec.Cmd
|
activeContainers map[string]*exec.Cmd
|
||||||
restrictionPath string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewDriver(root, initPath string) (*driver, error) {
|
func NewDriver(root, initPath string) (*driver, error) {
|
||||||
|
@ -68,14 +67,8 @@ func NewDriver(root, initPath string) (*driver, error) {
|
||||||
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
|
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
restrictionPath := filepath.Join(root, "empty")
|
|
||||||
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &driver{
|
return &driver{
|
||||||
root: root,
|
root: root,
|
||||||
restrictionPath: restrictionPath,
|
|
||||||
initPath: initPath,
|
initPath: initPath,
|
||||||
activeContainers: make(map[string]*exec.Cmd),
|
activeContainers: make(map[string]*exec.Cmd),
|
||||||
}, nil
|
}, nil
|
||||||
|
|
|
@ -123,15 +123,12 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo
|
||||||
systemMounts := []mount{
|
systemMounts := []mount{
|
||||||
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
||||||
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
|
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
|
||||||
|
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
||||||
|
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(mounts.OfType("devtmpfs")) == 1 {
|
if len(mounts.OfType("devtmpfs")) == 1 {
|
||||||
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)})
|
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)})
|
||||||
}
|
}
|
||||||
systemMounts = append(systemMounts,
|
|
||||||
mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
|
||||||
mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
|
||||||
)
|
|
||||||
|
|
||||||
return systemMounts
|
return systemMounts
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,8 +72,8 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string,
|
||||||
|
|
||||||
runtime.LockOSThread()
|
runtime.LockOSThread()
|
||||||
|
|
||||||
if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
|
if container.Context["restrictions"] != "" {
|
||||||
if err := restrict.Restrict("/", restrictionPath); err != nil {
|
if err := restrict.Restrict(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,67 +11,42 @@ import (
|
||||||
"github.com/dotcloud/docker/pkg/system"
|
"github.com/dotcloud/docker/pkg/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
// "restrictions" are container paths (files, directories, whatever) that have to be masked.
|
|
||||||
// maskPath is a "safe" path to be mounted over maskedPath. It can take two special values:
|
|
||||||
// - if it is "", then nothing is mounted;
|
|
||||||
// - if it is "EMPTY", then an empty directory is mounted instead.
|
|
||||||
// If remountRO is true then the maskedPath is remounted read-only (regardless of whether a maskPath was used).
|
|
||||||
type restriction struct {
|
|
||||||
maskedPath string
|
|
||||||
maskPath string
|
|
||||||
remountRO bool
|
|
||||||
}
|
|
||||||
|
|
||||||
var restrictions = []restriction{
|
|
||||||
{"/proc", "", true},
|
|
||||||
{"/sys", "", true},
|
|
||||||
{"/proc/kcore", "/dev/null", false},
|
|
||||||
}
|
|
||||||
|
|
||||||
// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
|
// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
|
||||||
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
|
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
|
||||||
// "empty" should be the path to an empty directory.
|
func Restrict() error {
|
||||||
func Restrict(rootfs, empty string) error {
|
// remount proc and sys as readonly
|
||||||
for _, restriction := range restrictions {
|
for _, dest := range []string{"proc", "sys"} {
|
||||||
dest := filepath.Join(rootfs, restriction.maskedPath)
|
if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
|
||||||
if restriction.maskPath != "" {
|
return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
|
||||||
var source string
|
|
||||||
if restriction.maskPath == "EMPTY" {
|
|
||||||
source = empty
|
|
||||||
} else {
|
|
||||||
source = filepath.Join(rootfs, restriction.maskPath)
|
|
||||||
}
|
|
||||||
if err := system.Mount(source, dest, "", syscall.MS_BIND, ""); err != nil {
|
|
||||||
return fmt.Errorf("unable to bind-mount %s over %s: %s", source, dest, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if restriction.remountRO {
|
|
||||||
if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
|
|
||||||
return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := system.Mount("/proc/kcore", "/dev/null", "", syscall.MS_BIND, ""); err != nil {
|
||||||
|
return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore")
|
||||||
|
}
|
||||||
|
|
||||||
// This weird trick will allow us to mount /proc read-only, while being able to use AppArmor.
|
// This weird trick will allow us to mount /proc read-only, while being able to use AppArmor.
|
||||||
// This is because apparently, loading an AppArmor profile requires write access to /proc/1/attr.
|
// This is because apparently, loading an AppArmor profile requires write access to /proc/1/attr.
|
||||||
// So we do another mount of procfs, ensure it's write-able, and bind-mount a subset of it.
|
// So we do another mount of procfs, ensure it's write-able, and bind-mount a subset of it.
|
||||||
tmpProcPath := filepath.Join(rootfs, ".proc")
|
var (
|
||||||
if err := os.Mkdir(tmpProcPath, 0700); err != nil {
|
rwAttrPath = filepath.Join(".proc", "1", "attr")
|
||||||
return fmt.Errorf("unable to create temporary proc mountpoint %s: %s", tmpProcPath, err)
|
roAttrPath = filepath.Join("proc", "1", "attr")
|
||||||
|
)
|
||||||
|
|
||||||
|
if err := os.Mkdir(".proc", 0700); err != nil {
|
||||||
|
return fmt.Errorf("unable to create temporary proc mountpoint .proc: %s", err)
|
||||||
}
|
}
|
||||||
if err := system.Mount("proc", tmpProcPath, "proc", 0, ""); err != nil {
|
if err := system.Mount("proc", ".proc", "proc", 0, ""); err != nil {
|
||||||
return fmt.Errorf("unable to mount proc on temporary proc mountpoint: %s", err)
|
return fmt.Errorf("unable to mount proc on temporary proc mountpoint: %s", err)
|
||||||
}
|
}
|
||||||
if err := system.Mount("proc", tmpProcPath, "", syscall.MS_REMOUNT, ""); err != nil {
|
if err := system.Mount("proc", ".proc", "", syscall.MS_REMOUNT, ""); err != nil {
|
||||||
return fmt.Errorf("unable to remount proc read-write: %s", err)
|
return fmt.Errorf("unable to remount proc read-write: %s", err)
|
||||||
}
|
}
|
||||||
rwAttrPath := filepath.Join(rootfs, ".proc", "1", "attr")
|
|
||||||
roAttrPath := filepath.Join(rootfs, "proc", "1", "attr")
|
|
||||||
if err := system.Mount(rwAttrPath, roAttrPath, "", syscall.MS_BIND, ""); err != nil {
|
if err := system.Mount(rwAttrPath, roAttrPath, "", syscall.MS_BIND, ""); err != nil {
|
||||||
return fmt.Errorf("unable to bind-mount %s on %s: %s", rwAttrPath, roAttrPath, err)
|
return fmt.Errorf("unable to bind-mount %s on %s: %s", rwAttrPath, roAttrPath, err)
|
||||||
}
|
}
|
||||||
if err := system.Unmount(tmpProcPath, 0); err != nil {
|
if err := system.Unmount(".proc", 0); err != nil {
|
||||||
return fmt.Errorf("unable to unmount temporary proc filesystem: %s", err)
|
return fmt.Errorf("unable to unmount temporary proc filesystem: %s", err)
|
||||||
}
|
}
|
||||||
return nil
|
return os.RemoveAll(".proc")
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,6 @@ package restrict
|
||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
func Restrict(rootfs, empty string) error {
|
func Restrict() error {
|
||||||
return fmt.Errorf("not supported")
|
return fmt.Errorf("not supported")
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue