From 9bae4f2f246154507aab9b0c5b779133723888a6 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Thu, 12 Aug 2021 13:57:50 -0700 Subject: [PATCH] Add more optimal MatchesUsingParentResult method, use it in pkg/archive Signed-off-by: Aaron Lehmann --- pkg/archive/archive.go | 25 +++++++++++++++- pkg/fileutils/fileutils.go | 52 ++++++++++++++++++++++++++++----- pkg/fileutils/fileutils_test.go | 38 +++++++++++++++++++----- 3 files changed, 99 insertions(+), 16 deletions(-) diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 38cba551a8..25ca3959cb 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -817,6 +817,11 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) for _, include := range options.IncludeFiles { rebaseName := options.RebaseNames[include] + var ( + parentMatched []bool + parentDirs []string + ) + walkRoot := getWalkRoot(srcPath, include) filepath.Walk(walkRoot, func(filePath string, f os.FileInfo, err error) error { if err != nil { @@ -843,11 +848,29 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) // is asking for that file no matter what - which is true // for some files, like .dockerignore and Dockerfile (sometimes) if include != relFilePath { - skip, err = pm.Matches(relFilePath) + for len(parentDirs) != 0 { + lastParentDir := parentDirs[len(parentDirs)-1] + if strings.HasPrefix(relFilePath, lastParentDir+string(os.PathSeparator)) { + break + } + parentDirs = parentDirs[:len(parentDirs)-1] + parentMatched = parentMatched[:len(parentMatched)-1] + } + + if len(parentMatched) != 0 { + skip, err = pm.MatchesUsingParentResult(relFilePath, parentMatched[len(parentMatched)-1]) + } else { + skip, err = pm.Matches(relFilePath) + } if err != nil { logrus.Errorf("Error matching %s: %v", relFilePath, err) return err } + + if f.IsDir() { + parentDirs = append(parentDirs, relFilePath) + parentMatched = append(parentMatched, skip) + } } if skip { diff --git a/pkg/fileutils/fileutils.go b/pkg/fileutils/fileutils.go index 31c5f78ec6..b90a5b48ed 100644 --- a/pkg/fileutils/fileutils.go +++ b/pkg/fileutils/fileutils.go @@ -55,8 +55,12 @@ func NewPatternMatcher(patterns []string) (*PatternMatcher, error) { return pm, nil } -// Matches matches path against all the patterns. Matches is not safe to be -// called concurrently +// Matches returns true if "file" matches any of the patterns +// and isn't excluded by any of the subsequent patterns. +// +// The "file" argument should be a slash-delimited path. +// +// Matches is not safe to call concurrently. func (pm *PatternMatcher) Matches(file string) (bool, error) { matched := false file = filepath.FromSlash(file) @@ -64,10 +68,11 @@ func (pm *PatternMatcher) Matches(file string) (bool, error) { parentPathDirs := strings.Split(parentPath, string(os.PathSeparator)) for _, pattern := range pm.patterns { - negative := false - - if pattern.exclusion { - negative = true + // Skip evaluation if this is an inclusion and the filename + // already matched the pattern, or it's an exclusion and it has + // not matched the pattern yet. + if pattern.exclusion != matched { + continue } match, err := pattern.match(file) @@ -86,13 +91,45 @@ func (pm *PatternMatcher) Matches(file string) (bool, error) { } if match { - matched = !negative + matched = !pattern.exclusion } } return matched, nil } +// MatchesUsingParentResult returns true if "file" matches any of the patterns +// and isn't excluded by any of the subsequent patterns. The functionality is +// the same as Matches, but as an optimization, the caller keeps track of +// whether the parent directory matched. +// +// The "file" argument should be a slash-delimited path. +// +// MatchesUsingParentResult is not safe to call concurrently. +func (pm *PatternMatcher) MatchesUsingParentResult(file string, parentMatched bool) (bool, error) { + matched := parentMatched + file = filepath.FromSlash(file) + + for _, pattern := range pm.patterns { + // Skip evaluation if this is an inclusion and the filename + // already matched the pattern, or it's an exclusion and it has + // not matched the pattern yet. + if pattern.exclusion != matched { + continue + } + + match, err := pattern.match(file) + if err != nil { + return false, err + } + + if match { + matched = !pattern.exclusion + } + } + return matched, nil +} + // Exclusions returns true if any of the patterns define exclusions func (pm *PatternMatcher) Exclusions() bool { return pm.exclusions @@ -121,7 +158,6 @@ func (p *Pattern) Exclusion() bool { } func (p *Pattern) match(path string) (bool, error) { - if p.regexp == nil { if err := p.compile(); err != nil { return false, filepath.ErrBadPattern diff --git a/pkg/fileutils/fileutils_test.go b/pkg/fileutils/fileutils_test.go index e11711b653..5147c23597 100644 --- a/pkg/fileutils/fileutils_test.go +++ b/pkg/fileutils/fileutils_test.go @@ -382,13 +382,37 @@ func TestMatches(t *testing.T) { }...) } - for _, test := range tests { - desc := fmt.Sprintf("pattern=%q text=%q", test.pattern, test.text) - pm, err := NewPatternMatcher([]string{test.pattern}) - assert.NilError(t, err, desc) - res, _ := pm.Matches(test.text) - assert.Check(t, is.Equal(test.pass, res), desc) - } + t.Run("Matches", func(t *testing.T) { + for _, test := range tests { + desc := fmt.Sprintf("pattern=%q text=%q", test.pattern, test.text) + pm, err := NewPatternMatcher([]string{test.pattern}) + assert.NilError(t, err, desc) + res, _ := pm.Matches(test.text) + assert.Check(t, is.Equal(test.pass, res), desc) + } + }) + + t.Run("MatchesUsingParentResult", func(t *testing.T) { + for _, test := range tests { + desc := fmt.Sprintf("pattern=%q text=%q", test.pattern, test.text) + pm, err := NewPatternMatcher([]string{test.pattern}) + assert.NilError(t, err, desc) + + parentPath := path.Dir(test.text) + parentPathDirs := strings.Split(parentPath, "/") + + parentMatched := false + if parentPath != "." { + for i := range parentPathDirs { + parentMatched, _ = pm.MatchesUsingParentResult(strings.Join(parentPathDirs[:i+1], "/"), parentMatched) + } + } + + res, _ := pm.MatchesUsingParentResult(test.text, parentMatched) + assert.Check(t, is.Equal(test.pass, res), desc) + } + }) + } func TestCleanPatterns(t *testing.T) {