From 2ffef1b7eb618162673c6ffabccb9ca57c7dfce3 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Tue, 1 Apr 2014 11:28:21 +0200 Subject: [PATCH] devmapper: Avoid AB-BA deadlock We currently drop the global lock while holding a per-device lock when waiting for device removal, and then we re-aquire it when the sleep is done. This is causing a AB-BA deadlock if anyone at the same time tries to do any operation on that device like this: thread A: thread B grabs global lock grabs device lock releases global lock sleeps grabs global lock blocks on device lock wakes up blocks on global lock To trigger this you can for instance do: ID=`docker run -d fedora sleep 5` cd /var/lib/docker/devicemapper/mnt/$ID docker wait $ID docker rm $ID & docker rm $ID The unmount will fail due to the mount being busy thus causing the timeout and the second rm will then trigger the deadlock. We fix this by adding a lock ordering such that the device locks are always grabbed before the global lock. This is safe since the device lookups now have a separate lock. Docker-DCO-1.1-Signed-off-by: Alexander Larsson (github: alexlarsson) --- runtime/graphdriver/devmapper/deviceset.go | 58 +++++++++++++--------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/runtime/graphdriver/devmapper/deviceset.go b/runtime/graphdriver/devmapper/deviceset.go index f972c34bb1..97d670a3d9 100644 --- a/runtime/graphdriver/devmapper/deviceset.go +++ b/runtime/graphdriver/devmapper/deviceset.go @@ -47,6 +47,11 @@ type DevInfo struct { // sometimes release that lock while sleeping. In that case // this per-device lock is still held, protecting against // other accesses to the device that we're doing the wait on. + // + // WARNING: In order to avoid AB-BA deadlocks when releasing + // the global lock while holding the per-device locks all + // device locks must be aquired *before* the device lock, and + // multiple device locks should be aquired parent before child. lock sync.Mutex `json:"-"` } @@ -580,13 +585,6 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error { } func (devices *DeviceSet) AddDevice(hash, baseHash string) error { - devices.Lock() - defer devices.Unlock() - - if info, _ := devices.lookupDevice(hash); info != nil { - return fmt.Errorf("device %s already exists", hash) - } - baseInfo, err := devices.lookupDevice(baseHash) if err != nil { return err @@ -595,6 +593,13 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string) error { baseInfo.lock.Lock() defer baseInfo.lock.Unlock() + devices.Lock() + defer devices.Unlock() + + if info, _ := devices.lookupDevice(hash); info != nil { + return fmt.Errorf("device %s already exists", hash) + } + deviceId := devices.allocateDeviceId() if err := devices.createSnapDevice(devices.getPoolDevName(), deviceId, baseInfo.Name(), baseInfo.DeviceId); err != nil { @@ -658,9 +663,6 @@ func (devices *DeviceSet) deleteDevice(info *DevInfo) error { } func (devices *DeviceSet) DeleteDevice(hash string) error { - devices.Lock() - defer devices.Unlock() - info, err := devices.lookupDevice(hash) if err != nil { return err @@ -669,6 +671,9 @@ func (devices *DeviceSet) DeleteDevice(hash string) error { info.lock.Lock() defer info.lock.Unlock() + devices.Lock() + defer devices.Unlock() + return devices.deleteDevice(info) } @@ -802,8 +807,6 @@ func (devices *DeviceSet) waitClose(info *DevInfo) error { } func (devices *DeviceSet) Shutdown() error { - devices.Lock() - defer devices.Unlock() utils.Debugf("[deviceset %s] shutdown()", devices.devicePrefix) utils.Debugf("[devmapper] Shutting down DeviceSet: %s", devices.root) @@ -827,31 +830,36 @@ func (devices *DeviceSet) Shutdown() error { utils.Debugf("Shutdown unmounting %s, error: %s\n", info.mountPath, err) } + devices.Lock() if err := devices.deactivateDevice(info); err != nil { utils.Debugf("Shutdown deactivate %s , error: %s\n", info.Hash, err) } + devices.Unlock() } info.lock.Unlock() } info, _ := devices.lookupDevice("") if info != nil { + info.lock.Lock() + devices.Lock() if err := devices.deactivateDevice(info); err != nil { utils.Debugf("Shutdown deactivate base , error: %s\n", err) } + devices.Unlock() + info.lock.Unlock() } + devices.Lock() if err := devices.deactivatePool(); err != nil { utils.Debugf("Shutdown deactivate pool , error: %s\n", err) } + devices.Unlock() return nil } func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) error { - devices.Lock() - defer devices.Unlock() - info, err := devices.lookupDevice(hash) if err != nil { return err @@ -860,6 +868,9 @@ func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) erro info.lock.Lock() defer info.lock.Unlock() + devices.Lock() + defer devices.Unlock() + if info.mountCount > 0 { if path != info.mountPath { return fmt.Errorf("Trying to mount devmapper device in multple places (%s, %s)", info.mountPath, path) @@ -900,8 +911,6 @@ func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) erro func (devices *DeviceSet) UnmountDevice(hash string, mode UnmountMode) error { utils.Debugf("[devmapper] UnmountDevice(hash=%s, mode=%d)", hash, mode) defer utils.Debugf("[devmapper] UnmountDevice END") - devices.Lock() - defer devices.Unlock() info, err := devices.lookupDevice(hash) if err != nil { @@ -911,6 +920,9 @@ func (devices *DeviceSet) UnmountDevice(hash string, mode UnmountMode) error { info.lock.Lock() defer info.lock.Unlock() + devices.Lock() + defer devices.Unlock() + if mode == UnmountFloat { if info.floating { return fmt.Errorf("UnmountDevice: can't float floating reference %s\n", hash) @@ -971,9 +983,6 @@ func (devices *DeviceSet) HasInitializedDevice(hash string) bool { } func (devices *DeviceSet) HasActivatedDevice(hash string) bool { - devices.Lock() - defer devices.Unlock() - info, _ := devices.lookupDevice(hash) if info == nil { return false @@ -982,6 +991,9 @@ func (devices *DeviceSet) HasActivatedDevice(hash string) bool { info.lock.Lock() defer info.lock.Unlock() + devices.Lock() + defer devices.Unlock() + devinfo, _ := getInfo(info.Name()) return devinfo != nil && devinfo.Exists != 0 } @@ -1026,9 +1038,6 @@ func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSec } func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { - devices.Lock() - defer devices.Unlock() - info, err := devices.lookupDevice(hash) if err != nil { return nil, err @@ -1037,6 +1046,9 @@ func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) { info.lock.Lock() defer info.lock.Unlock() + devices.Lock() + defer devices.Unlock() + status := &DevStatus{ DeviceId: info.DeviceId, Size: info.Size,