diff --git a/builder/internals.go b/builder/internals.go index aa8ec5cf72..3c3ad534c2 100644 --- a/builder/internals.go +++ b/builder/internals.go @@ -214,11 +214,11 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp return err } else if fi.IsDir() { var subfiles []string - for file, sum := range sums { - absFile := path.Join(b.contextPath, file) + for _, fileInfo := range sums { + absFile := path.Join(b.contextPath, fileInfo.Name()) absOrigPath := path.Join(b.contextPath, origPath) if strings.HasPrefix(absFile, absOrigPath) { - subfiles = append(subfiles, sum) + subfiles = append(subfiles, fileInfo.Sum()) } } sort.Strings(subfiles) @@ -230,8 +230,9 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp origPath = origPath[1:] } origPath = strings.TrimPrefix(origPath, "./") - if h, ok := sums[origPath]; ok { - hash = "file:" + h + // This will match on the first file in sums of the archive + if fis := sums.GetFile(origPath); fis != nil { + hash = "file:" + fis.Sum() } } b.Config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) %s %s in %s", cmdName, hash, dest)} diff --git a/pkg/tarsum/fileinfosums.go b/pkg/tarsum/fileinfosums.go new file mode 100644 index 0000000000..f9f468098c --- /dev/null +++ b/pkg/tarsum/fileinfosums.go @@ -0,0 +1,125 @@ +package tarsum + +import "sort" + +// This info will be accessed through interface so the actual name and sum cannot be medled with +type FileInfoSumInterface interface { + // File name + Name() string + // Checksum of this particular file and its headers + Sum() string + // Position of file in the tar + Pos() int64 +} + +type fileInfoSum struct { + name string + sum string + pos int64 +} + +func (fis fileInfoSum) Name() string { + return fis.name +} +func (fis fileInfoSum) Sum() string { + return fis.sum +} +func (fis fileInfoSum) Pos() int64 { + return fis.pos +} + +type FileInfoSums []FileInfoSumInterface + +// GetFile returns the first FileInfoSumInterface with a matching name +func (fis FileInfoSums) GetFile(name string) FileInfoSumInterface { + for i := range fis { + if fis[i].Name() == name { + return fis[i] + } + } + return nil +} + +// GetAllFile returns a FileInfoSums with all matching names +func (fis FileInfoSums) GetAllFile(name string) FileInfoSums { + f := FileInfoSums{} + for i := range fis { + if fis[i].Name() == name { + f = append(f, fis[i]) + } + } + return f +} + +func contains(s []string, e string) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + +func (fis FileInfoSums) GetDuplicatePaths() (dups FileInfoSums) { + seen := make(map[string]int, len(fis)) // allocate earl. no need to grow this map. + for i := range fis { + f := fis[i] + if _, ok := seen[f.Name()]; ok { + dups = append(dups, f) + } else { + seen[f.Name()] = 0 + } + } + return dups +} + +func (fis FileInfoSums) Len() int { return len(fis) } +func (fis FileInfoSums) Swap(i, j int) { fis[i], fis[j] = fis[j], fis[i] } + +func (fis FileInfoSums) SortByPos() { + sort.Sort(byPos{fis}) +} + +func (fis FileInfoSums) SortByNames() { + sort.Sort(byName{fis}) +} + +func (fis FileInfoSums) SortBySums() { + dups := fis.GetDuplicatePaths() + if len(dups) > 0 { + sort.Sort(bySum{fis, dups}) + } else { + sort.Sort(bySum{fis, nil}) + } +} + +// byName is a sort.Sort helper for sorting by file names. +// If names are the same, order them by their appearance in the tar archive +type byName struct{ FileInfoSums } + +func (bn byName) Less(i, j int) bool { + if bn.FileInfoSums[i].Name() == bn.FileInfoSums[j].Name() { + return bn.FileInfoSums[i].Pos() < bn.FileInfoSums[j].Pos() + } + return bn.FileInfoSums[i].Name() < bn.FileInfoSums[j].Name() +} + +// bySum is a sort.Sort helper for sorting by the sums of all the fileinfos in the tar archive +type bySum struct { + FileInfoSums + dups FileInfoSums +} + +func (bs bySum) Less(i, j int) bool { + if bs.dups != nil && bs.FileInfoSums[i].Name() == bs.FileInfoSums[j].Name() { + return bs.FileInfoSums[i].Pos() < bs.FileInfoSums[j].Pos() + } + return bs.FileInfoSums[i].Sum() < bs.FileInfoSums[j].Sum() +} + +// byPos is a sort.Sort helper for sorting by the sums of all the fileinfos by their original order +type byPos struct{ FileInfoSums } + +func (bp byPos) Less(i, j int) bool { + return bp.FileInfoSums[i].Pos() < bp.FileInfoSums[j].Pos() +} diff --git a/pkg/tarsum/fileinfosums_test.go b/pkg/tarsum/fileinfosums_test.go new file mode 100644 index 0000000000..e1c6cc1238 --- /dev/null +++ b/pkg/tarsum/fileinfosums_test.go @@ -0,0 +1,45 @@ +package tarsum + +import "testing" + +func newFileInfoSums() FileInfoSums { + return FileInfoSums{ + fileInfoSum{name: "file3", sum: "2abcdef1234567890", pos: 2}, + fileInfoSum{name: "dup1", sum: "deadbeef1", pos: 5}, + fileInfoSum{name: "file1", sum: "0abcdef1234567890", pos: 0}, + fileInfoSum{name: "file4", sum: "3abcdef1234567890", pos: 3}, + fileInfoSum{name: "dup1", sum: "deadbeef0", pos: 4}, + fileInfoSum{name: "file2", sum: "1abcdef1234567890", pos: 1}, + } +} + +func TestSortFileInfoSums(t *testing.T) { + dups := newFileInfoSums().GetAllFile("dup1") + if len(dups) != 2 { + t.Errorf("expected length 2, got %d", len(dups)) + } + dups.SortByNames() + if dups[0].Pos() != 4 { + t.Errorf("sorted dups should be ordered by position. Expected 4, got %d", dups[0].Pos()) + } + + fis := newFileInfoSums() + expected := "0abcdef1234567890" + fis.SortBySums() + got := fis[0].Sum() + if got != expected { + t.Errorf("Expected %q, got %q", expected, got) + } + + fis = newFileInfoSums() + expected = "dup1" + fis.SortByNames() + gotFis := fis[0] + if gotFis.Name() != expected { + t.Errorf("Expected %q, got %q", expected, gotFis.Name()) + } + // since a duplicate is first, ensure it is ordered first by position too + if gotFis.Pos() != 4 { + t.Errorf("Expected %d, got %d", 4, gotFis.Pos()) + } +} diff --git a/pkg/tarsum/tarsum.go b/pkg/tarsum/tarsum.go index 69775fa8af..4ae71f0704 100644 --- a/pkg/tarsum/tarsum.go +++ b/pkg/tarsum/tarsum.go @@ -39,7 +39,7 @@ func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) { // checksums of a tar archive type TarSum interface { io.Reader - GetSums() map[string]string + GetSums() FileInfoSums Sum([]byte) string Version() Version } @@ -54,7 +54,8 @@ type tarSum struct { bufGz *bytes.Buffer bufData []byte h hash.Hash - sums map[string]string + sums FileInfoSums + fileCounter int64 currentFile string finished bool first bool @@ -126,7 +127,7 @@ func (ts *tarSum) Read(buf []byte) (int, error) { ts.h = sha256.New() ts.h.Reset() ts.first = true - ts.sums = make(map[string]string) + ts.sums = FileInfoSums{} } if ts.finished { @@ -153,7 +154,8 @@ func (ts *tarSum) Read(buf []byte) (int, error) { return 0, err } if !ts.first { - ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil)) + ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter}) + ts.fileCounter++ ts.h.Reset() } else { ts.first = false @@ -218,25 +220,20 @@ func (ts *tarSum) Read(buf []byte) (int, error) { } func (ts *tarSum) Sum(extra []byte) string { - var sums []string - - for _, sum := range ts.sums { - sums = append(sums, sum) - } - sort.Strings(sums) + ts.sums.SortBySums() h := sha256.New() if extra != nil { h.Write(extra) } - for _, sum := range sums { - log.Debugf("-->%s<--", sum) - h.Write([]byte(sum)) + for _, fis := range ts.sums { + log.Debugf("-->%s<--", fis.Sum()) + h.Write([]byte(fis.Sum())) } checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil)) log.Debugf("checksum processed: %s", checksum) return checksum } -func (ts *tarSum) GetSums() map[string]string { +func (ts *tarSum) GetSums() FileInfoSums { return ts.sums } diff --git a/pkg/tarsum/tarsum_test.go b/pkg/tarsum/tarsum_test.go index 6616cba086..d0b4c94757 100644 --- a/pkg/tarsum/tarsum_test.go +++ b/pkg/tarsum/tarsum_test.go @@ -59,6 +59,22 @@ var testLayers = []testLayer{ { options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"}, + { + // this tar has two files with the same path + filename: "testdata/collision/collision-0.tar", + tarsum: "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"}, + { + // this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above + filename: "testdata/collision/collision-1.tar", + tarsum: "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"}, + { + // this tar has newer of collider-0.tar, ensuring is has different hash + filename: "testdata/collision/collision-2.tar", + tarsum: "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"}, + { + // this tar has newer of collider-1.tar, ensuring is has different hash + filename: "testdata/collision/collision-3.tar", + tarsum: "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"}, } type sizedOptions struct { diff --git a/pkg/tarsum/testdata/collision/collision-0.tar b/pkg/tarsum/testdata/collision/collision-0.tar new file mode 100644 index 0000000000..1c636b3bc7 Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-0.tar differ diff --git a/pkg/tarsum/testdata/collision/collision-1.tar b/pkg/tarsum/testdata/collision/collision-1.tar new file mode 100644 index 0000000000..b411be9785 Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-1.tar differ diff --git a/pkg/tarsum/testdata/collision/collision-2.tar b/pkg/tarsum/testdata/collision/collision-2.tar new file mode 100644 index 0000000000..7b5c04a964 Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-2.tar differ diff --git a/pkg/tarsum/testdata/collision/collision-3.tar b/pkg/tarsum/testdata/collision/collision-3.tar new file mode 100644 index 0000000000..f8c64586d2 Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-3.tar differ