From f9f80443638fc2d703ee6205c8ef3db8e38db9a3 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 15 Sep 2014 14:45:53 -0400 Subject: [PATCH] archive: preserve hardlinks in Tar and Untar * integration test for preserving hardlinks Signed-off-by: Vincent Batts Signed-off-by: Vincent Batts --- integration-cli/docker_cli_commit_test.go | 52 ++++++++++++++++++++ pkg/archive/archive.go | 52 +++++++++++++++----- pkg/archive/archive_test.go | 58 +++++++++++++++++++++++ pkg/archive/changes.go | 18 ++++--- 4 files changed, 160 insertions(+), 20 deletions(-) diff --git a/integration-cli/docker_cli_commit_test.go b/integration-cli/docker_cli_commit_test.go index ddc7a2e041..7715e81bf5 100644 --- a/integration-cli/docker_cli_commit_test.go +++ b/integration-cli/docker_cli_commit_test.go @@ -101,6 +101,58 @@ func TestCommitNewFile(t *testing.T) { logDone("commit - commit file and read") } +func TestCommitHardlink(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "-t", "--name", "hardlinks", "busybox", "sh", "-c", "touch file1 && ln file1 file2 && ls -di file1 file2") + firstOuput, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err) + } + + chunks := strings.Split(strings.TrimSpace(firstOuput), " ") + inode := chunks[0] + found := false + for _, chunk := range chunks[1:] { + if chunk == inode { + found = true + break + } + } + if !found { + t.Fatalf("Failed to create hardlink in a container. Expected to find %q in %q", inode, chunks[1:]) + } + + cmd = exec.Command(dockerBinary, "commit", "hardlinks", "hardlinks") + imageID, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(imageID, err) + } + imageID = strings.Trim(imageID, "\r\n") + + cmd = exec.Command(dockerBinary, "run", "-t", "hardlinks", "ls", "-di", "file1", "file2") + secondOuput, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err) + } + + chunks = strings.Split(strings.TrimSpace(secondOuput), " ") + inode = chunks[0] + found = false + for _, chunk := range chunks[1:] { + if chunk == inode { + found = true + break + } + } + if !found { + t.Fatalf("Failed to create hardlink in a container. Expected to find %q in %q", inode, chunks[1:]) + } + + deleteAllContainers() + deleteImages(imageID) + + logDone("commit - commit hardlinks") +} + func TestCommitTTY(t *testing.T) { cmd := exec.Command(dockerBinary, "run", "-t", "--name", "tty", "busybox", "/bin/ls") if _, err := runCommand(cmd); err != nil { diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 9c4d881cfa..dd14b778f2 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -153,7 +153,15 @@ func (compression *Compression) Extension() string { return "" } -func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { +type tarAppender struct { + TarWriter *tar.Writer + Buffer *bufio.Writer + + // for hardlink mapping + SeenFiles map[uint64]string +} + +func (ta *tarAppender) addTarFile(path, name string) error { fi, err := os.Lstat(path) if err != nil { return err @@ -188,13 +196,28 @@ func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { } + // if it's a regular file and has more than 1 link, + // it's hardlinked, so set the type flag accordingly + if fi.Mode().IsRegular() && stat.Nlink > 1 { + // a link should have a name that it links too + // and that linked name should be first in the tar archive + ino := uint64(stat.Ino) + if oldpath, ok := ta.SeenFiles[ino]; ok { + hdr.Typeflag = tar.TypeLink + hdr.Linkname = oldpath + hdr.Size = 0 // This Must be here for the writer math to add up! + } else { + ta.SeenFiles[ino] = name + } + } + capability, _ := system.Lgetxattr(path, "security.capability") if capability != nil { hdr.Xattrs = make(map[string]string) hdr.Xattrs["security.capability"] = string(capability) } - if err := tw.WriteHeader(hdr); err != nil { + if err := ta.TarWriter.WriteHeader(hdr); err != nil { return err } @@ -204,17 +227,17 @@ func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { return err } - twBuf.Reset(tw) - _, err = io.Copy(twBuf, file) + ta.Buffer.Reset(ta.TarWriter) + _, err = io.Copy(ta.Buffer, file) file.Close() if err != nil { return err } - err = twBuf.Flush() + err = ta.Buffer.Flush() if err != nil { return err } - twBuf.Reset(nil) + ta.Buffer.Reset(nil) } return nil @@ -345,9 +368,15 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) return nil, err } - tw := tar.NewWriter(compressWriter) - go func() { + ta := &tarAppender{ + TarWriter: tar.NewWriter(compressWriter), + Buffer: pools.BufioWriter32KPool.Get(nil), + SeenFiles: make(map[uint64]string), + } + // this buffer is needed for the duration of this piped stream + defer pools.BufioWriter32KPool.Put(ta.Buffer) + // In general we log errors here but ignore them because // during e.g. a diff operation the container can continue // mutating the filesystem and we can see transient errors @@ -357,9 +386,6 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) options.Includes = []string{"."} } - twBuf := pools.BufioWriter32KPool.Get(nil) - defer pools.BufioWriter32KPool.Put(twBuf) - var renamedRelFilePath string // For when tar.Options.Name is set for _, include := range options.Includes { filepath.Walk(filepath.Join(srcPath, include), func(filePath string, f os.FileInfo, err error) error { @@ -395,7 +421,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) relFilePath = strings.Replace(relFilePath, renamedRelFilePath, options.Name, 1) } - if err := addTarFile(filePath, relFilePath, tw, twBuf); err != nil { + if err := ta.addTarFile(filePath, relFilePath); err != nil { log.Debugf("Can't add file %s to tar: %s", srcPath, err) } return nil @@ -403,7 +429,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) } // Make sure to check the error on Close. - if err := tw.Close(); err != nil { + if err := ta.TarWriter.Close(); err != nil { log.Debugf("Can't close tar writer: %s", err) } if err := compressWriter.Close(); err != nil { diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index 900fff5f01..3516aca8f0 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -249,6 +249,64 @@ func TestUntarUstarGnuConflict(t *testing.T) { } } +func TestTarWithHardLink(t *testing.T) { + origin, err := ioutil.TempDir("", "docker-test-tar-hardlink") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(origin) + if err := ioutil.WriteFile(path.Join(origin, "1"), []byte("hello world"), 0700); err != nil { + t.Fatal(err) + } + if err := os.Link(path.Join(origin, "1"), path.Join(origin, "2")); err != nil { + t.Fatal(err) + } + + var i1, i2 uint64 + if i1, err = getNlink(path.Join(origin, "1")); err != nil { + t.Fatal(err) + } + // sanity check that we can hardlink + if i1 != 2 { + t.Skipf("skipping since hardlinks don't work here; expected 2 links, got %d", i1) + } + + dest, err := ioutil.TempDir("", "docker-test-tar-hardlink-dest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dest) + + // we'll do this in two steps to separate failure + fh, err := Tar(origin, Uncompressed) + if err != nil { + t.Fatal(err) + } + + // ensure we can read the whole thing with no error, before writing back out + buf, err := ioutil.ReadAll(fh) + if err != nil { + t.Fatal(err) + } + + bRdr := bytes.NewReader(buf) + err = Untar(bRdr, dest, &TarOptions{Compression: Uncompressed}) + if err != nil { + t.Fatal(err) + } + + if i1, err = getInode(path.Join(dest, "1")); err != nil { + t.Fatal(err) + } + if i2, err = getInode(path.Join(dest, "2")); err != nil { + t.Fatal(err) + } + + if i1 != i2 { + t.Errorf("expected matching inodes, but got %d and %d", i1, i2) + } +} + func getNlink(path string) (uint64, error) { stat, err := os.Stat(path) if err != nil { diff --git a/pkg/archive/changes.go b/pkg/archive/changes.go index 557b5db583..3e9ab45267 100644 --- a/pkg/archive/changes.go +++ b/pkg/archive/changes.go @@ -368,11 +368,15 @@ func minor(device uint64) uint64 { // ExportChanges produces an Archive from the provided changes, relative to dir. func ExportChanges(dir string, changes []Change) (Archive, error) { reader, writer := io.Pipe() - tw := tar.NewWriter(writer) - go func() { - twBuf := pools.BufioWriter32KPool.Get(nil) - defer pools.BufioWriter32KPool.Put(twBuf) + ta := &tarAppender{ + TarWriter: tar.NewWriter(writer), + Buffer: pools.BufioWriter32KPool.Get(nil), + SeenFiles: make(map[uint64]string), + } + // this buffer is needed for the duration of this piped stream + defer pools.BufioWriter32KPool.Put(ta.Buffer) + // In general we log errors here but ignore them because // during e.g. a diff operation the container can continue // mutating the filesystem and we can see transient errors @@ -390,19 +394,19 @@ func ExportChanges(dir string, changes []Change) (Archive, error) { AccessTime: timestamp, ChangeTime: timestamp, } - if err := tw.WriteHeader(hdr); err != nil { + if err := ta.TarWriter.WriteHeader(hdr); err != nil { log.Debugf("Can't write whiteout header: %s", err) } } else { path := filepath.Join(dir, change.Path) - if err := addTarFile(path, change.Path[1:], tw, twBuf); err != nil { + if err := ta.addTarFile(path, change.Path[1:]); err != nil { log.Debugf("Can't add file %s to tar: %s", path, err) } } } // Make sure to check the error on Close. - if err := tw.Close(); err != nil { + if err := ta.TarWriter.Close(); err != nil { log.Debugf("Can't close layer: %s", err) } writer.Close()