2015-07-25 04:35:07 -04:00
|
|
|
// Package tarsum provides algorithms to perform checksum calculation on
|
|
|
|
// filesystem layers.
|
|
|
|
//
|
|
|
|
// The transportation of filesystems, regarding Docker, is done with tar(1)
|
|
|
|
// archives. There are a variety of tar serialization formats [2], and a key
|
|
|
|
// concern here is ensuring a repeatable checksum given a set of inputs from a
|
|
|
|
// generic tar archive. Types of transportation include distribution to and from a
|
|
|
|
// registry endpoint, saving and loading through commands or Docker daemon APIs,
|
|
|
|
// transferring the build context from client to Docker daemon, and committing the
|
|
|
|
// filesystem of a container to become an image.
|
|
|
|
//
|
|
|
|
// As tar archives are used for transit, but not preserved in many situations, the
|
|
|
|
// focus of the algorithm is to ensure the integrity of the preserved filesystem,
|
|
|
|
// while maintaining a deterministic accountability. This includes neither
|
|
|
|
// constraining the ordering or manipulation of the files during the creation or
|
|
|
|
// unpacking of the archive, nor include additional metadata state about the file
|
|
|
|
// system attributes.
|
2014-07-30 09:42:12 -04:00
|
|
|
package tarsum
|
2013-07-17 15:13:22 -04:00
|
|
|
|
|
|
|
import (
|
2015-05-01 18:01:10 -04:00
|
|
|
"archive/tar"
|
2013-07-17 15:13:22 -04:00
|
|
|
"bytes"
|
|
|
|
"compress/gzip"
|
2014-12-23 16:40:06 -05:00
|
|
|
"crypto"
|
2013-07-17 15:13:22 -04:00
|
|
|
"crypto/sha256"
|
|
|
|
"encoding/hex"
|
2014-12-23 16:40:06 -05:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
2013-07-17 15:13:22 -04:00
|
|
|
"hash"
|
|
|
|
"io"
|
2016-04-01 13:49:04 -04:00
|
|
|
"path"
|
2013-12-26 19:02:15 -05:00
|
|
|
"strings"
|
2013-07-17 15:13:22 -04:00
|
|
|
)
|
|
|
|
|
2014-09-06 08:29:19 -04:00
|
|
|
const (
|
|
|
|
buf8K = 8 * 1024
|
|
|
|
buf16K = 16 * 1024
|
|
|
|
buf32K = 32 * 1024
|
|
|
|
)
|
|
|
|
|
2014-08-21 16:12:52 -04:00
|
|
|
// NewTarSum creates a new interface for calculating a fixed time checksum of a
|
|
|
|
// tar archive.
|
|
|
|
//
|
|
|
|
// This is used for calculating checksums of layers of an image, in some cases
|
|
|
|
// including the byte payload of the image's json metadata as well, and for
|
|
|
|
// calculating the checksums for buildcache.
|
2014-09-10 21:56:20 -04:00
|
|
|
func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
|
2014-11-19 15:46:03 -05:00
|
|
|
return NewTarSumHash(r, dc, v, DefaultTHash)
|
2014-08-21 16:12:52 -04:00
|
|
|
}
|
|
|
|
|
2015-07-25 04:35:07 -04:00
|
|
|
// NewTarSumHash creates a new TarSum, providing a THash to use rather than
|
|
|
|
// the DefaultTHash.
|
2014-05-15 16:50:58 -04:00
|
|
|
func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
|
2014-10-30 16:47:31 -04:00
|
|
|
headerSelector, err := getTarHeaderSelector(v)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2014-05-15 16:50:58 -04:00
|
|
|
}
|
2014-11-19 15:46:03 -05:00
|
|
|
ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
|
|
|
|
err = ts.initTarSum()
|
|
|
|
return ts, err
|
2014-05-15 16:50:58 -04:00
|
|
|
}
|
|
|
|
|
2015-07-25 04:35:07 -04:00
|
|
|
// NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
|
2014-12-23 16:40:06 -05:00
|
|
|
func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
|
|
|
|
parts := strings.SplitN(label, "+", 2)
|
|
|
|
if len(parts) != 2 {
|
|
|
|
return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
|
|
|
|
}
|
|
|
|
|
|
|
|
versionName, hashName := parts[0], parts[1]
|
|
|
|
|
|
|
|
version, ok := tarSumVersionsByName[versionName]
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
|
|
|
|
}
|
|
|
|
|
|
|
|
hashConfig, ok := standardHashConfigs[hashName]
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
|
|
|
|
}
|
|
|
|
|
|
|
|
tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
|
|
|
|
|
|
|
|
return NewTarSumHash(r, disableCompression, version, tHash)
|
|
|
|
}
|
|
|
|
|
2014-09-10 21:56:20 -04:00
|
|
|
// TarSum is the generic interface for calculating fixed time
|
2015-07-25 04:35:07 -04:00
|
|
|
// checksums of a tar archive.
|
2014-09-10 21:56:20 -04:00
|
|
|
type TarSum interface {
|
2014-08-21 16:12:52 -04:00
|
|
|
io.Reader
|
2014-09-04 16:13:50 -04:00
|
|
|
GetSums() FileInfoSums
|
2014-08-21 16:12:52 -04:00
|
|
|
Sum([]byte) string
|
|
|
|
Version() Version
|
2014-05-15 16:50:58 -04:00
|
|
|
Hash() THash
|
2014-08-21 16:12:52 -04:00
|
|
|
}
|
|
|
|
|
2015-07-25 04:35:07 -04:00
|
|
|
// tarSum struct is the structure for a Version0 checksum calculation.
|
2014-09-10 21:56:20 -04:00
|
|
|
type tarSum struct {
|
2013-07-17 15:13:22 -04:00
|
|
|
io.Reader
|
2014-01-07 20:46:04 -05:00
|
|
|
tarR *tar.Reader
|
|
|
|
tarW *tar.Writer
|
2014-05-15 16:50:58 -04:00
|
|
|
writer writeCloseFlusher
|
2014-01-07 20:46:04 -05:00
|
|
|
bufTar *bytes.Buffer
|
2014-05-15 16:50:58 -04:00
|
|
|
bufWriter *bytes.Buffer
|
2014-09-06 08:29:19 -04:00
|
|
|
bufData []byte
|
2014-01-07 20:46:04 -05:00
|
|
|
h hash.Hash
|
2014-05-15 16:50:58 -04:00
|
|
|
tHash THash
|
2014-09-04 16:13:50 -04:00
|
|
|
sums FileInfoSums
|
|
|
|
fileCounter int64
|
2014-01-07 20:46:04 -05:00
|
|
|
currentFile string
|
|
|
|
finished bool
|
|
|
|
first bool
|
2014-10-30 16:47:31 -04:00
|
|
|
DisableCompression bool // false by default. When false, the output gzip compressed.
|
|
|
|
tarSumVersion Version // this field is not exported so it can not be mutated during use
|
|
|
|
headerSelector tarHeaderSelector // handles selecting and ordering headers for files in the archive
|
2014-01-07 20:46:04 -05:00
|
|
|
}
|
|
|
|
|
2014-05-15 16:50:58 -04:00
|
|
|
func (ts tarSum) Hash() THash {
|
|
|
|
return ts.tHash
|
|
|
|
}
|
|
|
|
|
2014-09-10 21:56:20 -04:00
|
|
|
func (ts tarSum) Version() Version {
|
2014-08-21 16:12:52 -04:00
|
|
|
return ts.tarSumVersion
|
2014-01-07 20:46:04 -05:00
|
|
|
}
|
|
|
|
|
2015-07-25 04:35:07 -04:00
|
|
|
// THash provides a hash.Hash type generator and its name.
|
2014-05-15 16:50:58 -04:00
|
|
|
type THash interface {
|
|
|
|
Hash() hash.Hash
|
|
|
|
Name() string
|
|
|
|
}
|
|
|
|
|
2015-07-25 04:35:07 -04:00
|
|
|
// NewTHash is a convenience method for creating a THash.
|
2014-05-15 16:50:58 -04:00
|
|
|
func NewTHash(name string, h func() hash.Hash) THash {
|
|
|
|
return simpleTHash{n: name, h: h}
|
|
|
|
}
|
|
|
|
|
2014-12-23 16:40:06 -05:00
|
|
|
type tHashConfig struct {
|
|
|
|
name string
|
|
|
|
hash crypto.Hash
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
2015-01-23 12:54:17 -05:00
|
|
|
// NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
|
2014-12-23 16:40:06 -05:00
|
|
|
standardHashConfigs = map[string]tHashConfig{
|
|
|
|
"sha256": {name: "sha256", hash: crypto.SHA256},
|
|
|
|
"sha512": {name: "sha512", hash: crypto.SHA512},
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2015-12-13 11:00:39 -05:00
|
|
|
// DefaultTHash is default TarSum hashing algorithm - "sha256".
|
2014-05-15 16:50:58 -04:00
|
|
|
var DefaultTHash = NewTHash("sha256", sha256.New)
|
|
|
|
|
|
|
|
type simpleTHash struct {
|
|
|
|
n string
|
|
|
|
h func() hash.Hash
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sth simpleTHash) Name() string { return sth.n }
|
|
|
|
func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
|
|
|
|
|
2014-09-10 21:56:20 -04:00
|
|
|
func (ts *tarSum) encodeHeader(h *tar.Header) error {
|
2014-10-30 16:47:31 -04:00
|
|
|
for _, elem := range ts.headerSelector.selectHeaders(h) {
|
2013-07-17 15:13:22 -04:00
|
|
|
if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-05-15 16:50:58 -04:00
|
|
|
func (ts *tarSum) initTarSum() error {
|
|
|
|
ts.bufTar = bytes.NewBuffer([]byte{})
|
|
|
|
ts.bufWriter = bytes.NewBuffer([]byte{})
|
|
|
|
ts.tarR = tar.NewReader(ts.Reader)
|
|
|
|
ts.tarW = tar.NewWriter(ts.bufTar)
|
|
|
|
if !ts.DisableCompression {
|
|
|
|
ts.writer = gzip.NewWriter(ts.bufWriter)
|
|
|
|
} else {
|
|
|
|
ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
|
|
|
|
}
|
|
|
|
if ts.tHash == nil {
|
|
|
|
ts.tHash = DefaultTHash
|
|
|
|
}
|
|
|
|
ts.h = ts.tHash.Hash()
|
|
|
|
ts.h.Reset()
|
|
|
|
ts.first = true
|
|
|
|
ts.sums = FileInfoSums{}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-09-10 21:56:20 -04:00
|
|
|
func (ts *tarSum) Read(buf []byte) (int, error) {
|
2013-07-17 15:13:22 -04:00
|
|
|
if ts.finished {
|
2014-05-15 16:50:58 -04:00
|
|
|
return ts.bufWriter.Read(buf)
|
2013-07-17 15:13:22 -04:00
|
|
|
}
|
2014-09-25 18:58:35 -04:00
|
|
|
if len(ts.bufData) < len(buf) {
|
2014-09-06 08:29:19 -04:00
|
|
|
switch {
|
|
|
|
case len(buf) <= buf8K:
|
|
|
|
ts.bufData = make([]byte, buf8K)
|
|
|
|
case len(buf) <= buf16K:
|
|
|
|
ts.bufData = make([]byte, buf16K)
|
|
|
|
case len(buf) <= buf32K:
|
|
|
|
ts.bufData = make([]byte, buf32K)
|
|
|
|
default:
|
|
|
|
ts.bufData = make([]byte, len(buf))
|
|
|
|
}
|
2014-08-17 07:29:46 -04:00
|
|
|
}
|
2014-09-25 18:58:35 -04:00
|
|
|
buf2 := ts.bufData[:len(buf)]
|
2013-07-17 15:13:22 -04:00
|
|
|
|
|
|
|
n, err := ts.tarR.Read(buf2)
|
|
|
|
if err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
if _, err := ts.h.Write(buf2[:n]); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if !ts.first {
|
2014-09-04 16:13:50 -04:00
|
|
|
ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
|
|
|
|
ts.fileCounter++
|
2013-07-17 15:13:22 -04:00
|
|
|
ts.h.Reset()
|
|
|
|
} else {
|
|
|
|
ts.first = false
|
|
|
|
}
|
|
|
|
|
|
|
|
currentHeader, err := ts.tarR.Next()
|
|
|
|
if err != nil {
|
|
|
|
if err == io.EOF {
|
2014-09-16 02:52:06 -04:00
|
|
|
if err := ts.tarW.Close(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2014-05-15 16:50:58 -04:00
|
|
|
if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
|
2014-09-16 02:52:06 -04:00
|
|
|
return 0, err
|
|
|
|
}
|
2014-05-15 16:50:58 -04:00
|
|
|
if err := ts.writer.Close(); err != nil {
|
2013-07-17 15:13:22 -04:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
ts.finished = true
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
return n, err
|
|
|
|
}
|
2016-04-01 13:49:04 -04:00
|
|
|
ts.currentFile = path.Clean(currentHeader.Name)
|
2013-07-17 15:13:22 -04:00
|
|
|
if err := ts.encodeHeader(currentHeader); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if err := ts.tarW.WriteHeader(currentHeader); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if _, err := ts.tarW.Write(buf2[:n]); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
ts.tarW.Flush()
|
2014-05-15 16:50:58 -04:00
|
|
|
if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
|
2013-07-17 15:13:22 -04:00
|
|
|
return 0, err
|
|
|
|
}
|
2014-05-15 16:50:58 -04:00
|
|
|
ts.writer.Flush()
|
2013-07-17 15:13:22 -04:00
|
|
|
|
2014-05-15 16:50:58 -04:00
|
|
|
return ts.bufWriter.Read(buf)
|
2013-07-17 15:13:22 -04:00
|
|
|
}
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Filling the hash buffer
|
|
|
|
if _, err = ts.h.Write(buf2[:n]); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
2015-12-13 11:00:39 -05:00
|
|
|
// Filling the tar writer
|
2013-07-17 15:13:22 -04:00
|
|
|
if _, err = ts.tarW.Write(buf2[:n]); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
ts.tarW.Flush()
|
|
|
|
|
2014-05-15 16:50:58 -04:00
|
|
|
// Filling the output writer
|
|
|
|
if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
|
2013-07-17 15:13:22 -04:00
|
|
|
return 0, err
|
|
|
|
}
|
2014-05-15 16:50:58 -04:00
|
|
|
ts.writer.Flush()
|
2013-07-17 15:13:22 -04:00
|
|
|
|
2014-05-15 16:50:58 -04:00
|
|
|
return ts.bufWriter.Read(buf)
|
2013-07-17 15:13:22 -04:00
|
|
|
}
|
|
|
|
|
2014-09-10 21:56:20 -04:00
|
|
|
func (ts *tarSum) Sum(extra []byte) string {
|
2014-09-04 16:13:50 -04:00
|
|
|
ts.sums.SortBySums()
|
2014-05-15 16:50:58 -04:00
|
|
|
h := ts.tHash.Hash()
|
2013-07-22 19:16:31 -04:00
|
|
|
if extra != nil {
|
|
|
|
h.Write(extra)
|
|
|
|
}
|
2014-09-04 16:13:50 -04:00
|
|
|
for _, fis := range ts.sums {
|
|
|
|
h.Write([]byte(fis.Sum()))
|
2013-07-17 15:13:22 -04:00
|
|
|
}
|
2014-05-15 16:50:58 -04:00
|
|
|
checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
|
2013-07-17 15:13:22 -04:00
|
|
|
return checksum
|
|
|
|
}
|
2013-12-26 19:01:36 -05:00
|
|
|
|
2014-09-04 16:13:50 -04:00
|
|
|
func (ts *tarSum) GetSums() FileInfoSums {
|
2013-12-26 19:01:36 -05:00
|
|
|
return ts.sums
|
|
|
|
}
|