From b6ef4bc9521346dc5066d71821c6cadfbeced9d3 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Mon, 11 Nov 2013 14:35:29 +0100 Subject: [PATCH] archive.ChagesDir() - faster and better implementation This replaces the current version with the latest version from the dm branch. Advantages in this version: We only scan each directory tree once, which means less i/o and less chance of container changes inbetween the two scans causing inconsistencies. We avoid comparing some fields for change detection: * Inode * size-in-blocks These can change during a copy operation (e.g. in the dummy backend) without needing to actually reflect a change in content or metadata. * Ctime Any copy operation will create a new Ctime value, and there is no API to change it to the "source" value. * size for directories The size of a directory doesn't have to be the same just because you recreated the same content as another director. Internal details in the filesystem may make these different with no "real" change. --- archive/changes.go | 200 ++++++++++++++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 64 deletions(-) diff --git a/archive/changes.go b/archive/changes.go index a03172115f..95e8589d7b 100644 --- a/archive/changes.go +++ b/archive/changes.go @@ -106,107 +106,179 @@ func Changes(layers []string, rw string) ([]Change, error) { return changes, nil } -func ChangesDirs(newDir, oldDir string) ([]Change, error) { - var changes []Change - err := filepath.Walk(newDir, func(newPath string, f os.FileInfo, err error) error { - if err != nil { - return err - } +type FileInfo struct { + parent *FileInfo + name string + stat syscall.Stat_t + children map[string]*FileInfo +} - var newStat syscall.Stat_t - err = syscall.Lstat(newPath, &newStat) - if err != nil { - return err - } +func (root *FileInfo) LookUp(path string) *FileInfo { + parent := root + if path == "/" { + return root + } - // Rebase path - relPath, err := filepath.Rel(newDir, newPath) - if err != nil { - return err - } - relPath = filepath.Join("/", relPath) - - // Skip root - if relPath == "/" || relPath == "/.docker-id" { - return nil - } - - change := Change{ - Path: relPath, - } - - oldPath := filepath.Join(oldDir, relPath) - - var oldStat = &syscall.Stat_t{} - err = syscall.Lstat(oldPath, oldStat) - if err != nil { - if !os.IsNotExist(err) { - return err + pathElements := strings.Split(path, "/") + for _, elem := range pathElements { + if elem != "" { + child := parent.children[elem] + if child == nil { + return nil } - oldStat = nil + parent = child } + } + return parent +} - if oldStat == nil { - change.Kind = ChangeAdd - changes = append(changes, change) - } else { - if oldStat.Ino != newStat.Ino || - oldStat.Mode != newStat.Mode || +func (info *FileInfo) path() string { + if info.parent == nil { + return "/" + } + return filepath.Join(info.parent.path(), info.name) +} + +func (info *FileInfo) isDir() bool { + return info.parent == nil || info.stat.Mode&syscall.S_IFDIR == syscall.S_IFDIR +} + +func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) { + if oldInfo == nil { + // add + change := Change{ + Path: info.path(), + Kind: ChangeAdd, + } + *changes = append(*changes, change) + } + + // We make a copy so we can modify it to detect additions + // also, we only recurse on the old dir if the new info is a directory + // otherwise any previous delete/change is considered recursive + oldChildren := make(map[string]*FileInfo) + if oldInfo != nil && info.isDir() { + for k, v := range oldInfo.children { + oldChildren[k] = v + } + } + + for name, newChild := range info.children { + oldChild, _ := oldChildren[name] + if oldChild != nil { + // change? + oldStat := &oldChild.stat + newStat := &newChild.stat + // Note: We can't compare inode or ctime or blocksize here, because these change + // when copying a file into a container. However, that is not generally a problem + // because any content change will change mtime, and any status change should + // be visible when actually comparing the stat fields. The only time this + // breaks down is if some code intentionally hides a change by setting + // back mtime + if oldStat.Mode != newStat.Mode || oldStat.Uid != newStat.Uid || oldStat.Gid != newStat.Gid || oldStat.Rdev != newStat.Rdev || - oldStat.Size != newStat.Size || - oldStat.Blocks != newStat.Blocks || - oldStat.Mtim != newStat.Mtim || - oldStat.Ctim != newStat.Ctim { - change.Kind = ChangeModify - changes = append(changes, change) + // Don't look at size for dirs, its not a good measure of change + (oldStat.Size != newStat.Size && oldStat.Mode&syscall.S_IFDIR != syscall.S_IFDIR) || + oldStat.Mtim != newStat.Mtim { + change := Change{ + Path: newChild.path(), + Kind: ChangeModify, + } + *changes = append(*changes, change) } + + // Remove from copy so we can detect deletions + delete(oldChildren, name) } - return nil - }) - if err != nil { - return nil, err + newChild.addChanges(oldChild, changes) } - err = filepath.Walk(oldDir, func(oldPath string, f os.FileInfo, err error) error { + for _, oldChild := range oldChildren { + // delete + change := Change{ + Path: oldChild.path(), + Kind: ChangeDelete, + } + *changes = append(*changes, change) + } + +} + +func (info *FileInfo) Changes(oldInfo *FileInfo) []Change { + var changes []Change + + info.addChanges(oldInfo, &changes) + + return changes +} + +func newRootFileInfo() *FileInfo { + root := &FileInfo{ + name: "/", + children: make(map[string]*FileInfo), + } + return root +} + +func collectFileInfo(sourceDir string) (*FileInfo, error) { + root := newRootFileInfo() + + err := filepath.Walk(sourceDir, func(path string, f os.FileInfo, err error) error { if err != nil { return err } // Rebase path - relPath, err := filepath.Rel(oldDir, oldPath) + relPath, err := filepath.Rel(sourceDir, path) if err != nil { return err } relPath = filepath.Join("/", relPath) - // Skip root if relPath == "/" { return nil } - change := Change{ - Path: relPath, + parent := root.LookUp(filepath.Dir(relPath)) + if parent == nil { + return fmt.Errorf("collectFileInfo: Unexpectedly no parent for %s", relPath) } - newPath := filepath.Join(newDir, relPath) - - var newStat = &syscall.Stat_t{} - err = syscall.Lstat(newPath, newStat) - if err != nil && os.IsNotExist(err) { - change.Kind = ChangeDelete - changes = append(changes, change) + info := &FileInfo{ + name: filepath.Base(relPath), + children: make(map[string]*FileInfo), + parent: parent, } + if err := syscall.Lstat(path, &info.stat); err != nil { + return err + } + + parent.children[info.name] = info + return nil }) if err != nil { return nil, err } - return changes, nil + return root, nil } +// Compare two directories and generate an array of Change objects describing the changes +func ChangesDirs(newDir, oldDir string) ([]Change, error) { + oldRoot, err := collectFileInfo(oldDir) + if err != nil { + return nil, err + } + newRoot, err := collectFileInfo(newDir) + if err != nil { + return nil, err + } + + return newRoot.Changes(oldRoot), nil +} func ExportChanges(root, rw string) (Archive, error) { changes, err := ChangesDirs(root, rw)