From 360078d7613e1939c6d2f949ccac14c6ab9d568e Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 26 Dec 2013 12:27:51 -0800 Subject: [PATCH 1/5] Remove old debug from tarsum --- utils/tarsum.go | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/utils/tarsum.go b/utils/tarsum.go index 9cba516a34..c441c25766 100644 --- a/utils/tarsum.go +++ b/utils/tarsum.go @@ -1,26 +1,17 @@ package utils import ( + "archive/tar" "bytes" "compress/gzip" "crypto/sha256" "encoding/hex" - "archive/tar" "hash" "io" "sort" "strconv" ) -type verboseHash struct { - hash.Hash -} - -func (h verboseHash) Write(buf []byte) (int, error) { - Debugf("--->%s<---", buf) - return h.Hash.Write(buf) -} - type TarSum struct { io.Reader tarR *tar.Reader @@ -29,7 +20,6 @@ type TarSum struct { bufTar *bytes.Buffer bufGz *bytes.Buffer h hash.Hash - h2 verboseHash sums []string finished bool first bool @@ -52,7 +42,6 @@ func (ts *TarSum) encodeHeader(h *tar.Header) error { // {"atime", strconv.Itoa(int(h.AccessTime.UTC().Unix()))}, // {"ctime", strconv.Itoa(int(h.ChangeTime.UTC().Unix()))}, } { - // Debugf("-->%s<-- -->%s<--", elem[0], elem[1]) if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil { return err } @@ -68,7 +57,6 @@ func (ts *TarSum) Read(buf []byte) (int, error) { ts.tarW = tar.NewWriter(ts.bufTar) ts.gz = gzip.NewWriter(ts.bufGz) ts.h = sha256.New() - // ts.h = verboseHash{sha256.New()} ts.h.Reset() ts.first = true } From 1d4b7d8fa1af95ce83f263a49ed24e686fa7cb62 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 26 Dec 2013 15:43:27 -0800 Subject: [PATCH 2/5] Make sure the cache lookup returns always the same result --- server.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/server.go b/server.go index 9f6842dbae..02486f9b5e 100644 --- a/server.go +++ b/server.go @@ -21,6 +21,7 @@ import ( "path" "path/filepath" "runtime" + "sort" "strconv" "strings" "sync" @@ -1694,16 +1695,13 @@ func (srv *Server) ImageGetCached(imgID string, config *Config) (*Image, error) } // Store the tree in a map of map (map[parentId][childId]) - imageMap := make(map[string]map[string]struct{}) + imageMap := make(map[string][]string) for _, img := range images { - if _, exists := imageMap[img.Parent]; !exists { - imageMap[img.Parent] = make(map[string]struct{}) - } - imageMap[img.Parent][img.ID] = struct{}{} + imageMap[img.Parent] = append(imageMap[img.Parent], img.ID) } - + sort.Strings(imageMap[imgID]) // Loop on the children of the given image and check the config - for elem := range imageMap[imgID] { + for _, elem := range imageMap[imgID] { img, err := srv.runtime.graph.Get(elem) if err != nil { return nil, err From fc9f4d8bad975ffe060eb741b38c5502dd9886a4 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 26 Dec 2013 16:01:36 -0800 Subject: [PATCH 3/5] Log files name along with their checksum in TarSum + add a Method to retrieve the checksum map --- utils/tarsum.go | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/utils/tarsum.go b/utils/tarsum.go index c441c25766..ab57d54110 100644 --- a/utils/tarsum.go +++ b/utils/tarsum.go @@ -14,15 +14,16 @@ import ( type TarSum struct { io.Reader - tarR *tar.Reader - tarW *tar.Writer - gz *gzip.Writer - bufTar *bytes.Buffer - bufGz *bytes.Buffer - h hash.Hash - sums []string - finished bool - first bool + tarR *tar.Reader + tarW *tar.Writer + gz *gzip.Writer + bufTar *bytes.Buffer + bufGz *bytes.Buffer + h hash.Hash + sums map[string]string + currentFile string + finished bool + first bool } func (ts *TarSum) encodeHeader(h *tar.Header) error { @@ -59,6 +60,7 @@ func (ts *TarSum) Read(buf []byte) (int, error) { ts.h = sha256.New() ts.h.Reset() ts.first = true + ts.sums = make(map[string]string) } if ts.finished { @@ -73,7 +75,7 @@ func (ts *TarSum) Read(buf []byte) (int, error) { return 0, err } if !ts.first { - ts.sums = append(ts.sums, hex.EncodeToString(ts.h.Sum(nil))) + ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil)) ts.h.Reset() } else { ts.first = false @@ -131,12 +133,17 @@ func (ts *TarSum) Read(buf []byte) (int, error) { } func (ts *TarSum) Sum(extra []byte) string { - sort.Strings(ts.sums) + var sums []string + + for _, sum := range ts.sums { + sums = append(sums, sum) + } + sort.Strings(sums) h := sha256.New() if extra != nil { h.Write(extra) } - for _, sum := range ts.sums { + for _, sum := range sums { Debugf("-->%s<--", sum) h.Write([]byte(sum)) } @@ -144,3 +151,7 @@ func (ts *TarSum) Sum(extra []byte) string { Debugf("checksum processed: %s", checksum) return checksum } + +func (ts *TarSum) GetSums() map[string]string { + return ts.sums +} From 894d4a23fba1a67087ae5bbe9c50c56e6dc09289 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 26 Dec 2013 16:02:15 -0800 Subject: [PATCH 4/5] Change BuildFile in order to use TarSum instead of custom checksum --- buildfile.go | 132 ++++++++++++++++-------------------------------- utils/tarsum.go | 2 + 2 files changed, 45 insertions(+), 89 deletions(-) diff --git a/buildfile.go b/buildfile.go index fb97f26dcc..aab5c24eff 100644 --- a/buildfile.go +++ b/buildfile.go @@ -1,9 +1,6 @@ package docker import ( - "archive/tar" - "crypto/sha256" - "encoding/hex" "encoding/json" "errors" "fmt" @@ -18,8 +15,8 @@ import ( "path/filepath" "reflect" "regexp" + "sort" "strings" - "time" ) var ( @@ -36,10 +33,13 @@ type buildFile struct { runtime *Runtime srv *Server - image string - maintainer string - config *Config - context string + image string + maintainer string + config *Config + + contextPath string + context *utils.TarSum + verbose bool utilizeCache bool rm bool @@ -118,66 +118,6 @@ func (b *buildFile) probeCache() (bool, error) { return false, nil } -// hashPath calculates a strong hash (sha256) value for a file tree located -// at `basepth`/`pth`, including all attributes that would normally be -// captured by `tar`. The path to hash is passed in two pieces only to -// permit logging the second piece in isolation, assuming the first is a -// temporary directory in which docker is running. If `clobberTimes` is -// true and hashPath is applied to a single file, the ctime/atime/mtime of -// the file is considered to be unix time 0, for purposes of hashing. -func (b *buildFile) hashPath(basePth, pth string, clobberTimes bool) (string, error) { - - p := path.Join(basePth, pth) - - st, err := os.Stat(p) - if err != nil { - return "", err - } - - h := sha256.New() - - if st.IsDir() { - tarRd, err := archive.Tar(p, archive.Uncompressed) - if err != nil { - return "", err - } - _, err = io.Copy(h, tarRd) - if err != nil { - return "", err - } - - } else { - hdr, err := tar.FileInfoHeader(st, "") - if err != nil { - return "", err - } - if clobberTimes { - hdr.AccessTime = time.Unix(0, 0) - hdr.ChangeTime = time.Unix(0, 0) - hdr.ModTime = time.Unix(0, 0) - } - hdr.Name = filepath.Base(p) - tarWr := tar.NewWriter(h) - if err := tarWr.WriteHeader(hdr); err != nil { - return "", err - } - - fileRd, err := os.Open(p) - if err != nil { - return "", err - } - - if _, err = io.Copy(tarWr, fileRd); err != nil { - return "", err - } - tarWr.Close() - } - - hstr := hex.EncodeToString(h.Sum(nil)) - fmt.Fprintf(b.outStream, " ---> data at %s has sha256 %.12s...\n", pth, hstr) - return hstr, nil -} - func (b *buildFile) CmdRun(args string) error { if b.image == "" { return fmt.Errorf("Please provide a source image with `from` prior to run") @@ -347,8 +287,8 @@ func (b *buildFile) CmdVolume(args string) error { } func (b *buildFile) checkPathForAddition(orig string) error { - origPath := path.Join(b.context, orig) - if !strings.HasPrefix(origPath, b.context) { + origPath := path.Join(b.contextPath, orig) + if !strings.HasPrefix(origPath, b.contextPath) { return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath) } _, err := os.Stat(origPath) @@ -359,8 +299,10 @@ func (b *buildFile) checkPathForAddition(orig string) error { } func (b *buildFile) addContext(container *Container, orig, dest string) error { - origPath := path.Join(b.context, orig) - destPath := path.Join(container.RootfsPath(), dest) + var ( + origPath = path.Join(b.contextPath, orig) + destPath = path.Join(container.RootfsPath(), dest) + ) // Preserve the trailing '/' if strings.HasSuffix(dest, "/") { destPath = destPath + "/" @@ -388,7 +330,7 @@ func (b *buildFile) addContext(container *Container, orig, dest string) error { } func (b *buildFile) CmdAdd(args string) error { - if b.context == "" { + if b.context == nil { return fmt.Errorf("No context given. Impossible to use ADD") } tmp := strings.SplitN(args, " ", 2) @@ -408,22 +350,20 @@ func (b *buildFile) CmdAdd(args string) error { cmd := b.config.Cmd b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)} - b.config.Image = b.image - origPath := orig - destPath := dest - clobberTimes := false + // FIXME: do we really need this? + var ( + origPath = orig + destPath = dest + ) if utils.IsURL(orig) { - - clobberTimes = true - resp, err := utils.Download(orig) if err != nil { return err } - tmpDirName, err := ioutil.TempDir(b.context, "docker-remote") + tmpDirName, err := ioutil.TempDir(b.contextPath, "docker-remote") if err != nil { return err } @@ -464,9 +404,23 @@ func (b *buildFile) CmdAdd(args string) error { // Hash path and check the cache if b.utilizeCache { - hash, err := b.hashPath(b.context, origPath, clobberTimes) - if err != nil { + var ( + hash string + sums = b.context.GetSums() + ) + if fi, err := os.Stat(path.Join(b.contextPath, origPath)); err != nil { return err + } else if fi.IsDir() { + var subfiles []string + for file, sum := range sums { + if strings.HasPrefix(file, origPath) { + subfiles = append(subfiles, sum) + } + } + sort.Strings(subfiles) + hash = strings.Join(subfiles, ",") + } else { + hash = sums[origPath] } b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", hash, dest)} hit, err := b.probeCache() @@ -635,17 +589,17 @@ func (b *buildFile) commit(id string, autoCmd []string, comment string) error { var lineContinuation = regexp.MustCompile(`\s*\\\s*\n`) func (b *buildFile) Build(context io.Reader) (string, error) { - // FIXME: @creack "name" is a terrible variable name - name, err := ioutil.TempDir("", "docker-build") + tmpdirPath, err := ioutil.TempDir("", "docker-build") if err != nil { return "", err } - if err := archive.Untar(context, name, nil); err != nil { + b.context = &utils.TarSum{Reader: context} + if err := archive.Untar(b.context, tmpdirPath, nil); err != nil { return "", err } - defer os.RemoveAll(name) - b.context = name - filename := path.Join(name, "Dockerfile") + defer os.RemoveAll(tmpdirPath) + b.contextPath = tmpdirPath + filename := path.Join(tmpdirPath, "Dockerfile") if _, err := os.Stat(filename); os.IsNotExist(err) { return "", fmt.Errorf("Can't build a directory with no Dockerfile") } diff --git a/utils/tarsum.go b/utils/tarsum.go index ab57d54110..7de3dbe5be 100644 --- a/utils/tarsum.go +++ b/utils/tarsum.go @@ -10,6 +10,7 @@ import ( "io" "sort" "strconv" + "strings" ) type TarSum struct { @@ -92,6 +93,7 @@ func (ts *TarSum) Read(buf []byte) (int, error) { } return n, err } + ts.currentFile = strings.TrimSuffix(strings.TrimPrefix(currentHeader.Name, "./"), "/") if err := ts.encodeHeader(currentHeader); err != nil { return 0, err } From cd735496da6d977748d5c3b19976f42d2cc1933e Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 26 Dec 2013 16:42:05 -0800 Subject: [PATCH 5/5] Hash the sums for directory (ureadable when there is too many --- buildfile.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/buildfile.go b/buildfile.go index aab5c24eff..4f72a73520 100644 --- a/buildfile.go +++ b/buildfile.go @@ -1,6 +1,8 @@ package docker import ( + "crypto/sha256" + "encoding/hex" "encoding/json" "errors" "fmt" @@ -418,9 +420,11 @@ func (b *buildFile) CmdAdd(args string) error { } } sort.Strings(subfiles) - hash = strings.Join(subfiles, ",") + hasher := sha256.New() + hasher.Write([]byte(strings.Join(subfiles, ","))) + hash = "dir:" + hex.EncodeToString(hasher.Sum(nil)) } else { - hash = sums[origPath] + hash = "file:" + sums[origPath] } b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", hash, dest)} hit, err := b.probeCache()