mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
devmapper: Provide a knob dm.xfs_nospace_max_retries
When xfs filesystem is being used on top of thin pool, xfs can get ENOSPC errors from thin pool when thin pool is full. As of now xfs retries the IO and keeps on retrying and does not give up. This can result in container application being stuck for a very long time. In fact I have seen instances of unkillable processes. So that means once thin pool is full and process gets stuck, container can't be stopped/killed either and only option left seems to be power recycle of the box. In another instance, writer did not block but failed after a while. But when I tried to exit/stop the container, unmounting xfs hanged and only thing I could do was power cycle the machine. Now upstream kernel has committed patches where it allows user space to customize user space behavior in case of errors. One of the knobs is max_retries, which specifies how many times an IO should be retried when ENOSPC is encountered. This patch sets provides a tunable knob (dm.xfs_nospace_max_retries) so that user can specify value for max_retries and tune xfs behavior. If one sets this value to 0, xfs will not retry IO when ENOSPC error is encountered. It will instead give up and shutdown filesystem. This knob can be useful if one is running into unkillable processes/containers issue on top of xfs. Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
This commit is contained in:
parent
ba23bf202a
commit
4f0017b9ad
2 changed files with 61 additions and 0 deletions
|
@ -122,6 +122,7 @@ type DeviceSet struct {
|
|||
uidMaps []idtools.IDMap
|
||||
gidMaps []idtools.IDMap
|
||||
minFreeSpacePercent uint32 //min free space percentage in thinpool
|
||||
xfsNospaceRetries string // max retries when xfs receives ENOSPC
|
||||
}
|
||||
|
||||
// DiskUsage contains information about disk usage and is used when reporting Status of a device.
|
||||
|
@ -2308,6 +2309,38 @@ func (devices *DeviceSet) Shutdown(home string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Recent XFS changes allow changing behavior of filesystem in case of errors.
|
||||
// When thin pool gets full and XFS gets ENOSPC error, currently it tries
|
||||
// IO infinitely and sometimes it can block the container process
|
||||
// and process can't be killWith 0 value, XFS will not retry upon error
|
||||
// and instead will shutdown filesystem.
|
||||
|
||||
func (devices *DeviceSet) xfsSetNospaceRetries(info *devInfo) error {
|
||||
dmDevicePath, err := os.Readlink(info.DevName())
|
||||
if err != nil {
|
||||
return fmt.Errorf("devmapper: readlink failed for device %v:%v", info.DevName(), err)
|
||||
}
|
||||
|
||||
dmDeviceName := path.Base(dmDevicePath)
|
||||
filePath := "/sys/fs/xfs/" + dmDeviceName + "/error/metadata/ENOSPC/max_retries"
|
||||
maxRetriesFile, err := os.OpenFile(filePath, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
// Older kernels don't have this feature/file
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("devmapper: Failed to open file %v:%v", filePath, err)
|
||||
}
|
||||
defer maxRetriesFile.Close()
|
||||
|
||||
// Set max retries to 0
|
||||
_, err = maxRetriesFile.WriteString(devices.xfsNospaceRetries)
|
||||
if err != nil {
|
||||
return fmt.Errorf("devmapper: Failed to write string %v to file %v:%v", devices.xfsNospaceRetries, filePath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MountDevice mounts the device if not already mounted.
|
||||
func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
|
||||
info, err := devices.lookupDeviceWithLock(hash)
|
||||
|
@ -2348,6 +2381,12 @@ func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
|
|||
return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s", info.DevName(), path, err)
|
||||
}
|
||||
|
||||
if fstype == "xfs" && devices.xfsNospaceRetries != "" {
|
||||
if err := devices.xfsSetNospaceRetries(info); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -2668,6 +2707,12 @@ func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps [
|
|||
}
|
||||
|
||||
devices.minFreeSpacePercent = uint32(minFreeSpacePercent)
|
||||
case "dm.xfs_nospace_max_retries":
|
||||
_, err := strconv.ParseUint(val, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
devices.xfsNospaceRetries = val
|
||||
default:
|
||||
return nil, fmt.Errorf("devmapper: Unknown option %s\n", key)
|
||||
}
|
||||
|
|
|
@ -552,6 +552,22 @@ options for `zfs` start with `zfs` and options for `btrfs` start with `btrfs`.
|
|||
$ dockerd --storage-opt dm.min_free_space=10%
|
||||
```
|
||||
|
||||
* `dm.xfs_nospace_max_retries`
|
||||
|
||||
Specifies the maximum number of retries XFS should attempt to complete
|
||||
IO when ENOSPC (no space) error is returned by underlying storage device.
|
||||
|
||||
By default XFS retries infinitely for IO to finish and this can result
|
||||
in unkillable process. To change this behavior one can set
|
||||
xfs_nospace_max_retries to say 0 and XFS will not retry IO after getting
|
||||
ENOSPC and will shutdown filesystem.
|
||||
|
||||
Example use:
|
||||
|
||||
```bash
|
||||
$ dockerd --storage-opt dm.xfs_nospace_max_retries=0
|
||||
```
|
||||
|
||||
#### ZFS options
|
||||
|
||||
* `zfs.fsname`
|
||||
|
|
Loading…
Reference in a new issue