From a81278befee73b0c6a5ceb26dafe081577ce6e5c Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Fri, 10 May 2019 17:19:49 -0700
Subject: [PATCH] aufs: retry auplink flush

Running a bundled aufs benchmark sometimes results in this warning:

> WARN[0001] Couldn't run auplink before unmount /tmp/aufs-tests/aufs/mnt/XXXXX  error="exit status 22" storage-driver=aufs

If we take a look at what aulink utility produces on stderr, we'll see:

> auplink:proc_mnt.c:96: /tmp/aufs-tests/aufs/mnt/XXXXX: Invalid argument

and auplink exits with exit code of 22 (EINVAL).

Looking into auplink source code, what happens is it tries to find a
record in /proc/self/mounts corresponding to the mount point (by using
setmntent()/getmntent_r() glibc functions), and it fails.

Some manual testing, as well as runtime testing with lots of printf
added on mount/unmount, as well as calls to check the superblock fs
magic on mount point (as in graphdriver.Mounted(graphdriver.FsMagicAufs, target)
confirmed that this record is in fact there, but sometimes auplink
can't find it. I was also able to reproduce the same error (inability
to find a mount in /proc/self/mounts that should definitely be there)
using a small C program, mocking what `auplink` does:

```c
 #include <stdio.h>
 #include <err.h>
 #include <mntent.h>
 #include <string.h>
 #include <stdlib.h>

int main(int argc, char **argv)
{
	FILE *fp;
	struct mntent m, *p;
	char a[4096];
	char buf[4096 + 1024];
	int found =0, lines = 0;

	if (argc != 2) {
		fprintf(stderr, "Usage: %s <mountpoint>\n", argv[0]);
		exit(1);
	}

	fp = setmntent("/proc/self/mounts", "r");
	if (!fp) {
		err(1, "setmntent");
	}
	setvbuf(fp, a, _IOLBF, sizeof(a));
	while ((p = getmntent_r(fp, &m, buf, sizeof(buf)))) {
		lines++;
		if (!strcmp(p->mnt_dir, argv[1])) {
			found++;
		}
	}
	printf("found %d entries for %s (%d lines seen)\n", found, argv[1], lines);
	return !found;
}
```

I have also wrote a few other C proggies -- one that reads
/proc/self/mounts directly, one that reads /proc/self/mountinfo instead.
They are also prone to the same occasional error.

It is not perfectly clear why this happens, but so far my best theory
is when a lot of mounts/unmounts happen in parallel with reading
contents of /proc/self/mounts, sometimes the kernel fails to provide
continuity (i.e. it skips some part of file or mixes it up in some
other way). In other words, this is a kernel bug (which is probably
hard to fix unless some other interface to get a mount entry is added).

Now, there is no real fix, and a workaround I was able to come up
with is to retry when we got EINVAL. It usually works on the second
attempt, although I've once seen it took two attempts to go through.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
(cherry picked from commit ae431b10a9508e2bf3b1782e9d435855e3e13977)
---
 daemon/graphdriver/aufs/mount.go | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/daemon/graphdriver/aufs/mount.go b/daemon/graphdriver/aufs/mount.go
index 4e85cf235d..fc20a5eca6 100644
--- a/daemon/graphdriver/aufs/mount.go
+++ b/daemon/graphdriver/aufs/mount.go
@@ -4,14 +4,38 @@ package aufs // import "github.com/docker/docker/daemon/graphdriver/aufs"
 
 import (
 	"os/exec"
+	"syscall"
 
 	"github.com/docker/docker/pkg/mount"
 )
 
 // Unmount the target specified.
 func Unmount(target string) error {
-	if err := exec.Command("auplink", target, "flush").Run(); err != nil {
-		logger.WithError(err).Warnf("Couldn't run auplink before unmount %s", target)
+	const (
+		EINVAL  = 22 // if auplink returns this,
+		retries = 3  // retry a few times
+	)
+
+	for i := 0; ; i++ {
+		out, err := exec.Command("auplink", target, "flush").CombinedOutput()
+		if err == nil {
+			break
+		}
+		rc := 0
+		if exiterr, ok := err.(*exec.ExitError); ok {
+			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
+				rc = status.ExitStatus()
+			}
+		}
+		if i >= retries || rc != EINVAL {
+			logger.WithError(err).WithField("method", "Unmount").Warnf("auplink flush failed: %s", out)
+			break
+		}
+		// auplink failed to find target in /proc/self/mounts because
+		// kernel can't guarantee continuity while reading from it
+		// while mounts table is being changed
+		logger.Debugf("auplink flush error (retrying %d/%d): %s", i+1, retries, out)
 	}
+
 	return mount.Unmount(target)
 }