Vendor distribution/digest

Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan)
This commit is contained in:
Derek McGowan 2015-03-11 08:17:22 -07:00
parent c858cbffe0
commit 75e29f4550
9 changed files with 744 additions and 0 deletions

View File

@ -68,6 +68,13 @@ if [ "$1" = '--go' ]; then
mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar
fi
# get digest package from distribution
clone git github.com/docker/distribution 0c130dff5baf3168f2c85630c6d2344b81261269
mv src/github.com/docker/distribution/digest tmp-digest
rm -rf src/github.com/docker/distribution
mkdir -p src/github.com/docker/distribution
mv tmp-digest src/github.com/docker/distribution/digest
clone git github.com/docker/libcontainer 52a8c004ca94cf98f6866536de828c71eb42d1ec
# see src/github.com/docker/libcontainer/update-vendor.sh which is the "source of truth" for libcontainer deps (just like this file)
rm -rf src/github.com/docker/libcontainer/vendor

View File

@ -0,0 +1,168 @@
package digest
import (
"bytes"
"crypto/sha256"
"fmt"
"hash"
"io"
"io/ioutil"
"regexp"
"strings"
"github.com/docker/docker/pkg/tarsum"
)
const (
// DigestTarSumV1EmptyTar is the digest for the empty tar file.
DigestTarSumV1EmptyTar = "tarsum.v1+sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
// DigestSha256EmptyTar is the canonical sha256 digest of empty data
DigestSha256EmptyTar = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
)
// Digest allows simple protection of hex formatted digest strings, prefixed
// by their algorithm. Strings of type Digest have some guarantee of being in
// the correct format and it provides quick access to the components of a
// digest string.
//
// The following is an example of the contents of Digest types:
//
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
//
// More important for this code base, this type is compatible with tarsum
// digests. For example, the following would be a valid Digest:
//
// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b
//
// This allows to abstract the digest behind this type and work only in those
// terms.
type Digest string
// NewDigest returns a Digest from alg and a hash.Hash object.
func NewDigest(alg string, h hash.Hash) Digest {
return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil)))
}
// NewDigestFromHex returns a Digest from alg and a the hex encoded digest.
func NewDigestFromHex(alg, hex string) Digest {
return Digest(fmt.Sprintf("%s:%s", alg, hex))
}
// DigestRegexp matches valid digest types.
var DigestRegexp = regexp.MustCompile(`[a-zA-Z0-9-_+.]+:[a-fA-F0-9]+`)
// DigestRegexpAnchored matches valid digest types, anchored to the start and end of the match.
var DigestRegexpAnchored = regexp.MustCompile(`^` + DigestRegexp.String() + `$`)
var (
// ErrDigestInvalidFormat returned when digest format invalid.
ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format")
// ErrDigestUnsupported returned when the digest algorithm is unsupported by registry.
ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm")
)
// ParseDigest parses s and returns the validated digest object. An error will
// be returned if the format is invalid.
func ParseDigest(s string) (Digest, error) {
d := Digest(s)
return d, d.Validate()
}
// FromReader returns the most valid digest for the underlying content.
func FromReader(rd io.Reader) (Digest, error) {
h := sha256.New()
if _, err := io.Copy(h, rd); err != nil {
return "", err
}
return NewDigest("sha256", h), nil
}
// FromTarArchive produces a tarsum digest from reader rd.
func FromTarArchive(rd io.Reader) (Digest, error) {
ts, err := tarsum.NewTarSum(rd, true, tarsum.Version1)
if err != nil {
return "", err
}
if _, err := io.Copy(ioutil.Discard, ts); err != nil {
return "", err
}
d, err := ParseDigest(ts.Sum(nil))
if err != nil {
return "", err
}
return d, nil
}
// FromBytes digests the input and returns a Digest.
func FromBytes(p []byte) (Digest, error) {
return FromReader(bytes.NewReader(p))
}
// Validate checks that the contents of d is a valid digest, returning an
// error if not.
func (d Digest) Validate() error {
s := string(d)
// Common case will be tarsum
_, err := ParseTarSum(s)
if err == nil {
return nil
}
// Continue on for general parser
if !DigestRegexpAnchored.MatchString(s) {
return ErrDigestInvalidFormat
}
i := strings.Index(s, ":")
if i < 0 {
return ErrDigestInvalidFormat
}
// case: "sha256:" with no hex.
if i+1 == len(s) {
return ErrDigestInvalidFormat
}
switch s[:i] {
case "sha256", "sha384", "sha512":
break
default:
return ErrDigestUnsupported
}
return nil
}
// Algorithm returns the algorithm portion of the digest. This will panic if
// the underlying digest is not in a valid format.
func (d Digest) Algorithm() string {
return string(d[:d.sepIndex()])
}
// Hex returns the hex digest portion of the digest. This will panic if the
// underlying digest is not in a valid format.
func (d Digest) Hex() string {
return string(d[d.sepIndex()+1:])
}
func (d Digest) String() string {
return string(d)
}
func (d Digest) sepIndex() int {
i := strings.Index(string(d), ":")
if i < 0 {
panic("could not find ':' in digest: " + d)
}
return i
}

View File

@ -0,0 +1,111 @@
package digest
import (
"bytes"
"io"
"testing"
)
func TestParseDigest(t *testing.T) {
for _, testcase := range []struct {
input string
err error
algorithm string
hex string
}{
{
input: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
algorithm: "tarsum+sha256",
hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
{
input: "tarsum.dev+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
algorithm: "tarsum.dev+sha256",
hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
{
input: "tarsum.v1+sha256:220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e",
algorithm: "tarsum.v1+sha256",
hex: "220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e",
},
{
input: "sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
algorithm: "sha256",
hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
{
input: "sha384:d3fc7881460b7e22e3d172954463dddd7866d17597e7248453c48b3e9d26d9596bf9c4a9cf8072c9d5bad76e19af801d",
algorithm: "sha384",
hex: "d3fc7881460b7e22e3d172954463dddd7866d17597e7248453c48b3e9d26d9596bf9c4a9cf8072c9d5bad76e19af801d",
},
{
// empty hex
input: "sha256:",
err: ErrDigestInvalidFormat,
},
{
// just hex
input: "d41d8cd98f00b204e9800998ecf8427e",
err: ErrDigestInvalidFormat,
},
{
// not hex
input: "sha256:d41d8cd98f00b204e9800m98ecf8427e",
err: ErrDigestInvalidFormat,
},
{
input: "foo:d41d8cd98f00b204e9800998ecf8427e",
err: ErrDigestUnsupported,
},
} {
digest, err := ParseDigest(testcase.input)
if err != testcase.err {
t.Fatalf("error differed from expected while parsing %q: %v != %v", testcase.input, err, testcase.err)
}
if testcase.err != nil {
continue
}
if digest.Algorithm() != testcase.algorithm {
t.Fatalf("incorrect algorithm for parsed digest: %q != %q", digest.Algorithm(), testcase.algorithm)
}
if digest.Hex() != testcase.hex {
t.Fatalf("incorrect hex for parsed digest: %q != %q", digest.Hex(), testcase.hex)
}
// Parse string return value and check equality
newParsed, err := ParseDigest(digest.String())
if err != nil {
t.Fatalf("unexpected error parsing input %q: %v", testcase.input, err)
}
if newParsed != digest {
t.Fatalf("expected equal: %q != %q", newParsed, digest)
}
}
}
// A few test cases used to fix behavior we expect in storage backend.
func TestFromTarArchiveZeroLength(t *testing.T) {
checkTarsumDigest(t, "zero-length archive", bytes.NewReader([]byte{}), DigestTarSumV1EmptyTar)
}
func TestFromTarArchiveEmptyTar(t *testing.T) {
// String of 1024 zeros is a valid, empty tar file.
checkTarsumDigest(t, "1024 zero bytes", bytes.NewReader(bytes.Repeat([]byte("\x00"), 1024)), DigestTarSumV1EmptyTar)
}
func checkTarsumDigest(t *testing.T, msg string, rd io.Reader, expected Digest) {
dgst, err := FromTarArchive(rd)
if err != nil {
t.Fatalf("unexpected error digesting %s: %v", msg, err)
}
if dgst != expected {
t.Fatalf("unexpected digest for %s: %q != %q", msg, dgst, expected)
}
}

View File

@ -0,0 +1,44 @@
package digest
import (
"crypto/sha256"
"hash"
)
// Digester calculates the digest of written data. It is functionally
// equivalent to hash.Hash but provides methods for returning the Digest type
// rather than raw bytes.
type Digester struct {
alg string
hash hash.Hash
}
// NewDigester create a new Digester with the given hashing algorithm and instance
// of that algo's hasher.
func NewDigester(alg string, h hash.Hash) Digester {
return Digester{
alg: alg,
hash: h,
}
}
// NewCanonicalDigester is a convenience function to create a new Digester with
// out default settings.
func NewCanonicalDigester() Digester {
return NewDigester("sha256", sha256.New())
}
// Write data to the digester. These writes cannot fail.
func (d *Digester) Write(p []byte) (n int, err error) {
return d.hash.Write(p)
}
// Digest returns the current digest for this digester.
func (d *Digester) Digest() Digest {
return NewDigest(d.alg, d.hash)
}
// Reset the state of the digester.
func (d *Digester) Reset() {
d.hash.Reset()
}

View File

@ -0,0 +1,52 @@
// Package digest provides a generalized type to opaquely represent message
// digests and their operations within the registry. The Digest type is
// designed to serve as a flexible identifier in a content-addressable system.
// More importantly, it provides tools and wrappers to work with tarsums and
// hash.Hash-based digests with little effort.
//
// Basics
//
// The format of a digest is simply a string with two parts, dubbed the
// "algorithm" and the "digest", separated by a colon:
//
// <algorithm>:<digest>
//
// An example of a sha256 digest representation follows:
//
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
//
// In this case, the string "sha256" is the algorithm and the hex bytes are
// the "digest". A tarsum example will be more illustrative of the use case
// involved in the registry:
//
// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b
//
// For this, we consider the algorithm to be "tarsum+sha256". Prudent
// applications will favor the ParseDigest function to verify the format over
// using simple type casts. However, a normal string can be cast as a digest
// with a simple type conversion:
//
// Digest("tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b")
//
// Because the Digest type is simply a string, once a valid Digest is
// obtained, comparisons are cheap, quick and simple to express with the
// standard equality operator.
//
// Verification
//
// The main benefit of using the Digest type is simple verification against a
// given digest. The Verifier interface, modeled after the stdlib hash.Hash
// interface, provides a common write sink for digest verification. After
// writing is complete, calling the Verifier.Verified method will indicate
// whether or not the stream of bytes matches the target digest.
//
// Missing Features
//
// In addition to the above, we intend to add the following features to this
// package:
//
// 1. A Digester type that supports write sink digest calculation.
//
// 2. Suspend and resume of ongoing digest calculations to support efficient digest verification in the registry.
//
package digest

View File

@ -0,0 +1,70 @@
package digest
import (
"fmt"
"regexp"
)
// TarSumRegexp defines a reguler expression to match tarsum identifiers.
var TarsumRegexp = regexp.MustCompile("tarsum(?:.[a-z0-9]+)?\\+[a-zA-Z0-9]+:[A-Fa-f0-9]+")
// TarsumRegexpCapturing defines a reguler expression to match tarsum identifiers with
// capture groups corresponding to each component.
var TarsumRegexpCapturing = regexp.MustCompile("(tarsum)(.([a-z0-9]+))?\\+([a-zA-Z0-9]+):([A-Fa-f0-9]+)")
// TarSumInfo contains information about a parsed tarsum.
type TarSumInfo struct {
// Version contains the version of the tarsum.
Version string
// Algorithm contains the algorithm for the final digest
Algorithm string
// Digest contains the hex-encoded digest.
Digest string
}
// InvalidTarSumError provides informations about a TarSum that cannot be parsed
// by ParseTarSum.
type InvalidTarSumError string
func (e InvalidTarSumError) Error() string {
return fmt.Sprintf("invalid tarsum: %q", string(e))
}
// ParseTarSum parses a tarsum string into its components of interest. For
// example, this method may receive the tarsum in the following format:
//
// tarsum.v1+sha256:220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e
//
// The function will return the following:
//
// TarSumInfo{
// Version: "v1",
// Algorithm: "sha256",
// Digest: "220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e",
// }
//
func ParseTarSum(tarSum string) (tsi TarSumInfo, err error) {
components := TarsumRegexpCapturing.FindStringSubmatch(tarSum)
if len(components) != 1+TarsumRegexpCapturing.NumSubexp() {
return TarSumInfo{}, InvalidTarSumError(tarSum)
}
return TarSumInfo{
Version: components[3],
Algorithm: components[4],
Digest: components[5],
}, nil
}
// String returns the valid, string representation of the tarsum info.
func (tsi TarSumInfo) String() string {
if tsi.Version == "" {
return fmt.Sprintf("tarsum+%s:%s", tsi.Algorithm, tsi.Digest)
}
return fmt.Sprintf("tarsum.%s+%s:%s", tsi.Version, tsi.Algorithm, tsi.Digest)
}

View File

@ -0,0 +1,79 @@
package digest
import (
"reflect"
"testing"
)
func TestParseTarSumComponents(t *testing.T) {
for _, testcase := range []struct {
input string
expected TarSumInfo
err error
}{
{
input: "tarsum.v1+sha256:220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e",
expected: TarSumInfo{
Version: "v1",
Algorithm: "sha256",
Digest: "220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e",
},
},
{
input: "",
err: InvalidTarSumError(""),
},
{
input: "purejunk",
err: InvalidTarSumError("purejunk"),
},
{
input: "tarsum.v23+test:12341234123412341effefefe",
expected: TarSumInfo{
Version: "v23",
Algorithm: "test",
Digest: "12341234123412341effefefe",
},
},
// The following test cases are ported from docker core
{
// Version 0 tarsum
input: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
expected: TarSumInfo{
Algorithm: "sha256",
Digest: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
},
{
// Dev version tarsum
input: "tarsum.dev+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
expected: TarSumInfo{
Version: "dev",
Algorithm: "sha256",
Digest: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
},
} {
tsi, err := ParseTarSum(testcase.input)
if err != nil {
if testcase.err != nil && err == testcase.err {
continue // passes
}
t.Fatalf("unexpected error parsing tarsum: %v", err)
}
if testcase.err != nil {
t.Fatalf("expected error not encountered on %q: %v", testcase.input, testcase.err)
}
if !reflect.DeepEqual(tsi, testcase.expected) {
t.Fatalf("expected tarsum info: %v != %v", tsi, testcase.expected)
}
if testcase.input != tsi.String() {
t.Fatalf("input should equal output: %q != %q", tsi.String(), testcase.input)
}
}
}

View File

@ -0,0 +1,134 @@
package digest
import (
"crypto/sha256"
"crypto/sha512"
"hash"
"io"
"io/ioutil"
"github.com/docker/docker/pkg/tarsum"
)
// Verifier presents a general verification interface to be used with message
// digests and other byte stream verifications. Users instantiate a Verifier
// from one of the various methods, write the data under test to it then check
// the result with the Verified method.
type Verifier interface {
io.Writer
// Verified will return true if the content written to Verifier matches
// the digest.
Verified() bool
}
// NewDigestVerifier returns a verifier that compares the written bytes
// against a passed in digest.
func NewDigestVerifier(d Digest) (Verifier, error) {
if err := d.Validate(); err != nil {
return nil, err
}
alg := d.Algorithm()
switch alg {
case "sha256", "sha384", "sha512":
return hashVerifier{
hash: newHash(alg),
digest: d,
}, nil
default:
// Assume we have a tarsum.
version, err := tarsum.GetVersionFromTarsum(string(d))
if err != nil {
return nil, err
}
pr, pw := io.Pipe()
// TODO(stevvooe): We may actually want to ban the earlier versions of
// tarsum. That decision may not be the place of the verifier.
ts, err := tarsum.NewTarSum(pr, true, version)
if err != nil {
return nil, err
}
// TODO(sday): Ick! A goroutine per digest verification? We'll have to
// get the tarsum library to export an io.Writer variant.
go func() {
io.Copy(ioutil.Discard, ts)
pw.Close()
}()
return &tarsumVerifier{
digest: d,
ts: ts,
pr: pr,
pw: pw,
}, nil
}
}
// NewLengthVerifier returns a verifier that returns true when the number of
// read bytes equals the expected parameter.
func NewLengthVerifier(expected int64) Verifier {
return &lengthVerifier{
expected: expected,
}
}
type lengthVerifier struct {
expected int64 // expected bytes read
len int64 // bytes read
}
func (lv *lengthVerifier) Write(p []byte) (n int, err error) {
n = len(p)
lv.len += int64(n)
return n, err
}
func (lv *lengthVerifier) Verified() bool {
return lv.expected == lv.len
}
func newHash(name string) hash.Hash {
switch name {
case "sha256":
return sha256.New()
case "sha384":
return sha512.New384()
case "sha512":
return sha512.New()
default:
panic("unsupport algorithm: " + name)
}
}
type hashVerifier struct {
digest Digest
hash hash.Hash
}
func (hv hashVerifier) Write(p []byte) (n int, err error) {
return hv.hash.Write(p)
}
func (hv hashVerifier) Verified() bool {
return hv.digest == NewDigest(hv.digest.Algorithm(), hv.hash)
}
type tarsumVerifier struct {
digest Digest
ts tarsum.TarSum
pr *io.PipeReader
pw *io.PipeWriter
}
func (tv *tarsumVerifier) Write(p []byte) (n int, err error) {
return tv.pw.Write(p)
}
func (tv *tarsumVerifier) Verified() bool {
return tv.digest == Digest(tv.ts.Sum(nil))
}

View File

@ -0,0 +1,79 @@
package digest
import (
"bytes"
"crypto/rand"
"io"
"os"
"testing"
"github.com/docker/distribution/testutil"
)
func TestDigestVerifier(t *testing.T) {
p := make([]byte, 1<<20)
rand.Read(p)
digest, err := FromBytes(p)
if err != nil {
t.Fatalf("unexpected error digesting bytes: %#v", err)
}
verifier, err := NewDigestVerifier(digest)
if err != nil {
t.Fatalf("unexpected error getting digest verifier: %s", err)
}
io.Copy(verifier, bytes.NewReader(p))
if !verifier.Verified() {
t.Fatalf("bytes not verified")
}
tf, tarSum, err := testutil.CreateRandomTarFile()
if err != nil {
t.Fatalf("error creating tarfile: %v", err)
}
digest, err = FromTarArchive(tf)
if err != nil {
t.Fatalf("error digesting tarsum: %v", err)
}
if digest.String() != tarSum {
t.Fatalf("unexpected digest: %q != %q", digest.String(), tarSum)
}
expectedSize, _ := tf.Seek(0, os.SEEK_END) // Get tar file size
tf.Seek(0, os.SEEK_SET) // seek back
// This is the most relevant example for the registry application. It's
// effectively a read through pipeline, where the final sink is the digest
// verifier.
verifier, err = NewDigestVerifier(digest)
if err != nil {
t.Fatalf("unexpected error getting digest verifier: %s", err)
}
lengthVerifier := NewLengthVerifier(expectedSize)
rd := io.TeeReader(tf, lengthVerifier)
io.Copy(verifier, rd)
if !lengthVerifier.Verified() {
t.Fatalf("verifier detected incorrect length")
}
if !verifier.Verified() {
t.Fatalf("bytes not verified")
}
}
// TODO(stevvooe): Add benchmarks to measure bytes/second throughput for
// DigestVerifier. We should be tarsum/gzip limited for common cases but we
// want to verify this.
//
// The relevant benchmarks for comparison can be run with the following
// commands:
//
// go test -bench . crypto/sha1
// go test -bench . github.com/docker/docker/pkg/tarsum
//