Finalize TarSum Version 1 w/ refactor

The current Dev version of TarSum includes hashing of extended
file attributes and omits inclusion of modified time headers.

I refactored the logic around the version differences to make it
more clear that the difference between versions is in how tar
headers are selected and ordered.

TarSum Version 1 is now declared with the new Dev version continuing
to track it.

Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn)
This commit is contained in:
Josh Hawn 2014-10-30 13:47:31 -07:00
parent b63a254522
commit a7aa2c8ad2
3 changed files with 100 additions and 54 deletions

View File

@ -7,8 +7,6 @@ import (
"encoding/hex"
"hash"
"io"
"sort"
"strconv"
"strings"
"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
@ -29,18 +27,20 @@ const (
// including the byte payload of the image's json metadata as well, and for
// calculating the checksums for buildcache.
func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
if _, ok := tarSumVersions[v]; !ok {
return nil, ErrVersionNotImplemented
headerSelector, err := getTarHeaderSelector(v)
if err != nil {
return nil, err
}
return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v}, nil
return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector}, nil
}
// Create a new TarSum, providing a THash to use rather than the DefaultTHash
func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
if _, ok := tarSumVersions[v]; !ok {
return nil, ErrVersionNotImplemented
headerSelector, err := getTarHeaderSelector(v)
if err != nil {
return nil, err
}
return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, tHash: tHash}, nil
return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}, nil
}
// TarSum is the generic interface for calculating fixed time
@ -69,8 +69,9 @@ type tarSum struct {
currentFile string
finished bool
first bool
DisableCompression bool // false by default. When false, the output gzip compressed.
tarSumVersion Version // this field is not exported so it can not be mutated during use
DisableCompression bool // false by default. When false, the output gzip compressed.
tarSumVersion Version // this field is not exported so it can not be mutated during use
headerSelector tarHeaderSelector // handles selecting and ordering headers for files in the archive
}
func (ts tarSum) Hash() THash {
@ -103,49 +104,12 @@ type simpleTHash struct {
func (sth simpleTHash) Name() string { return sth.n }
func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
func (ts tarSum) selectHeaders(h *tar.Header, v Version) (set [][2]string) {
for _, elem := range [][2]string{
{"name", h.Name},
{"mode", strconv.Itoa(int(h.Mode))},
{"uid", strconv.Itoa(h.Uid)},
{"gid", strconv.Itoa(h.Gid)},
{"size", strconv.Itoa(int(h.Size))},
{"mtime", strconv.Itoa(int(h.ModTime.UTC().Unix()))},
{"typeflag", string([]byte{h.Typeflag})},
{"linkname", h.Linkname},
{"uname", h.Uname},
{"gname", h.Gname},
{"devmajor", strconv.Itoa(int(h.Devmajor))},
{"devminor", strconv.Itoa(int(h.Devminor))},
} {
if v >= VersionDev && elem[0] == "mtime" {
continue
}
set = append(set, elem)
}
return
}
func (ts *tarSum) encodeHeader(h *tar.Header) error {
for _, elem := range ts.selectHeaders(h, ts.Version()) {
for _, elem := range ts.headerSelector.selectHeaders(h) {
if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
return err
}
}
// include the additional pax headers, from an ordered list
if ts.Version() >= VersionDev {
var keys []string
for k := range h.Xattrs {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
if _, err := ts.h.Write([]byte(k + h.Xattrs[k])); err != nil {
return err
}
}
}
return nil
}

View File

@ -2,7 +2,11 @@ package tarsum
import (
"errors"
"sort"
"strconv"
"strings"
"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
)
// versioning of the TarSum algorithm
@ -10,11 +14,11 @@ import (
// i.e. "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"
type Version int
// Prefix of "tarsum"
const (
// Prefix of "tarsum"
Version0 Version = iota
// Prefix of "tarsum.dev"
// NOTE: this variable will be of an unsettled next-version of the TarSum calculation
Version1
// NOTE: this variable will be either the latest or an unsettled next-version of the TarSum calculation
VersionDev
)
@ -28,8 +32,9 @@ func GetVersions() []Version {
}
var tarSumVersions = map[Version]string{
0: "tarsum",
1: "tarsum.dev",
Version0: "tarsum",
Version1: "tarsum.v1",
VersionDev: "tarsum.dev",
}
func (tsv Version) String() string {
@ -50,7 +55,78 @@ func GetVersionFromTarsum(tarsum string) (Version, error) {
return -1, ErrNotVersion
}
// Errors that may be returned by functions in this package
var (
ErrNotVersion = errors.New("string does not include a TarSum Version")
ErrVersionNotImplemented = errors.New("TarSum Version is not yet implemented")
)
// tarHeaderSelector is the interface which different versions
// of tarsum should use for selecting and ordering tar headers
// for each item in the archive.
type tarHeaderSelector interface {
selectHeaders(h *tar.Header) (orderedHeaders [][2]string)
}
type tarHeaderSelectFunc func(h *tar.Header) (orderedHeaders [][2]string)
func (f tarHeaderSelectFunc) selectHeaders(h *tar.Header) (orderedHeaders [][2]string) {
return f(h)
}
func v0TarHeaderSelect(h *tar.Header) (orderedHeaders [][2]string) {
return [][2]string{
{"name", h.Name},
{"mode", strconv.Itoa(int(h.Mode))},
{"uid", strconv.Itoa(h.Uid)},
{"gid", strconv.Itoa(h.Gid)},
{"size", strconv.Itoa(int(h.Size))},
{"mtime", strconv.Itoa(int(h.ModTime.UTC().Unix()))},
{"typeflag", string([]byte{h.Typeflag})},
{"linkname", h.Linkname},
{"uname", h.Uname},
{"gname", h.Gname},
{"devmajor", strconv.Itoa(int(h.Devmajor))},
{"devminor", strconv.Itoa(int(h.Devminor))},
}
}
func v1TarHeaderSelect(h *tar.Header) (orderedHeaders [][2]string) {
// Get extended attributes.
xAttrKeys := make([]string, len(h.Xattrs))
for k := range h.Xattrs {
xAttrKeys = append(xAttrKeys, k)
}
sort.Strings(xAttrKeys)
// Make the slice with enough capacity to hold the 11 basic headers
// we want from the v0 selector plus however many xattrs we have.
orderedHeaders = make([][2]string, 0, 11+len(xAttrKeys))
// Copy all headers from v0 excluding the 'mtime' header (the 5th element).
v0headers := v0TarHeaderSelect(h)
orderedHeaders = append(orderedHeaders, v0headers[0:5]...)
orderedHeaders = append(orderedHeaders, v0headers[6:]...)
// Finally, append the sorted xattrs.
for _, k := range xAttrKeys {
orderedHeaders = append(orderedHeaders, [2]string{k, h.Xattrs[k]})
}
return
}
var registeredHeaderSelectors = map[Version]tarHeaderSelectFunc{
Version0: v0TarHeaderSelect,
Version1: v1TarHeaderSelect,
VersionDev: v1TarHeaderSelect,
}
func getTarHeaderSelector(v Version) (tarHeaderSelector, error) {
headerSelector, ok := registeredHeaderSelectors[v]
if !ok {
return nil, ErrVersionNotImplemented
}
return headerSelector, nil
}

View File

@ -11,11 +11,17 @@ func TestVersion(t *testing.T) {
t.Errorf("expected %q, got %q", expected, v.String())
}
expected = "tarsum.dev"
expected = "tarsum.v1"
v = 1
if v.String() != expected {
t.Errorf("expected %q, got %q", expected, v.String())
}
expected = "tarsum.dev"
v = 2
if v.String() != expected {
t.Errorf("expected %q, got %q", expected, v.String())
}
}
func TestGetVersion(t *testing.T) {