From adeb3e36840c6d9d1912dd78f5300ae74d11c068 Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Wed, 8 Mar 2017 14:23:25 -0800 Subject: [PATCH] Fix inefficient file paths filter Signed-off-by: Tonis Tiigi --- integration-cli/docker_cli_build_test.go | 2 +- pkg/archive/archive.go | 14 +- pkg/fileutils/fileutils.go | 193 ++++++++++++----------- pkg/fileutils/fileutils_test.go | 85 +++++----- 4 files changed, 153 insertions(+), 141 deletions(-) diff --git a/integration-cli/docker_cli_build_test.go b/integration-cli/docker_cli_build_test.go index d774c9111a..c4396d82c4 100644 --- a/integration-cli/docker_cli_build_test.go +++ b/integration-cli/docker_cli_build_test.go @@ -2403,7 +2403,7 @@ func (s *DockerSuite) TestBuildDockerignoringBadExclusion(c *check.C) { withFile(".dockerignore", "!\n"), )).Assert(c, icmd.Expected{ ExitCode: 1, - Err: "Error checking context: 'Illegal exclusion pattern: !", + Err: "Error checking context: 'illegal exclusion pattern: \"!\"", }) } diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 1b7b1b04f9..45e3ad8954 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -553,8 +553,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) // on platforms other than Windows. srcPath = fixVolumePathPrefix(srcPath) - patterns, patDirs, exceptions, err := fileutils.CleanPatterns(options.ExcludePatterns) - + pm, err := fileutils.NewPatternMatcher(options.ExcludePatterns) if err != nil { return nil, err } @@ -651,7 +650,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) // is asking for that file no matter what - which is true // for some files, like .dockerignore and Dockerfile (sometimes) if include != relFilePath { - skip, err = fileutils.OptimizedMatches(relFilePath, patterns, patDirs) + skip, err = pm.Matches(relFilePath) if err != nil { logrus.Errorf("Error matching %s: %v", relFilePath, err) return err @@ -670,18 +669,17 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) } // No exceptions (!...) in patterns so just skip dir - if !exceptions { + if !pm.Exclusions() { return filepath.SkipDir } dirSlash := relFilePath + string(filepath.Separator) - for _, pat := range patterns { - if pat[0] != '!' { + for _, pat := range pm.Patterns() { + if !pat.Exclusion() { continue } - pat = pat[1:] + string(filepath.Separator) - if strings.HasPrefix(pat, dirSlash) { + if strings.HasPrefix(pat.String()+string(filepath.Separator), dirSlash) { // found a match - so can't skip this dir return nil } diff --git a/pkg/fileutils/fileutils.go b/pkg/fileutils/fileutils.go index aeb9908a3c..57cc087340 100644 --- a/pkg/fileutils/fileutils.go +++ b/pkg/fileutils/fileutils.go @@ -13,98 +13,74 @@ import ( "github.com/Sirupsen/logrus" ) -// exclusion returns true if the specified pattern is an exclusion -func exclusion(pattern string) bool { - return pattern[0] == '!' +// PatternMatcher allows checking paths agaist a list of patterns +type PatternMatcher struct { + patterns []*Pattern + exclusions bool } -// empty returns true if the specified pattern is empty -func empty(pattern string) bool { - return pattern == "" -} - -// CleanPatterns takes a slice of patterns returns a new -// slice of patterns cleaned with filepath.Clean, stripped -// of any empty patterns and lets the caller know whether the -// slice contains any exception patterns (prefixed with !). -func CleanPatterns(patterns []string) ([]string, [][]string, bool, error) { - // Loop over exclusion patterns and: - // 1. Clean them up. - // 2. Indicate whether we are dealing with any exception rules. - // 3. Error if we see a single exclusion marker on its own (!). - cleanedPatterns := []string{} - patternDirs := [][]string{} - exceptions := false - for _, pattern := range patterns { +// NewPatternMatcher creates a new matcher object for specific patterns that can +// be used later to match against patterns against paths +func NewPatternMatcher(patterns []string) (*PatternMatcher, error) { + pm := &PatternMatcher{ + patterns: make([]*Pattern, 0, len(patterns)), + } + for _, p := range patterns { // Eliminate leading and trailing whitespace. - pattern = strings.TrimSpace(pattern) - if empty(pattern) { + p = strings.TrimSpace(p) + if p == "" { continue } - if exclusion(pattern) { - if len(pattern) == 1 { - return nil, nil, false, errors.New("Illegal exclusion pattern: !") + p = filepath.Clean(p) + newp := &Pattern{} + if p[0] == '!' { + if len(p) == 1 { + return nil, errors.New("illegal exclusion pattern: \"!\"") } - exceptions = true + newp.exclusion = true + p = p[1:] + pm.exclusions = true } - pattern = filepath.Clean(pattern) - cleanedPatterns = append(cleanedPatterns, pattern) - if exclusion(pattern) { - pattern = pattern[1:] + // Do some syntax checking on the pattern. + // filepath's Match() has some really weird rules that are inconsistent + // so instead of trying to dup their logic, just call Match() for its + // error state and if there is an error in the pattern return it. + // If this becomes an issue we can remove this since its really only + // needed in the error (syntax) case - which isn't really critical. + if _, err := filepath.Match(p, "."); err != nil { + return nil, err } - patternDirs = append(patternDirs, strings.Split(pattern, string(os.PathSeparator))) + newp.cleanedPattern = p + newp.dirs = strings.Split(p, string(os.PathSeparator)) + pm.patterns = append(pm.patterns, newp) } - - return cleanedPatterns, patternDirs, exceptions, nil + return pm, nil } -// Matches returns true if file matches any of the patterns -// and isn't excluded by any of the subsequent patterns. -func Matches(file string, patterns []string) (bool, error) { - file = filepath.Clean(file) - - if file == "." { - // Don't let them exclude everything, kind of silly. - return false, nil - } - - patterns, patDirs, _, err := CleanPatterns(patterns) - if err != nil { - return false, err - } - - return OptimizedMatches(file, patterns, patDirs) -} - -// OptimizedMatches is basically the same as fileutils.Matches() but optimized for archive.go. -// It will assume that the inputs have been preprocessed and therefore the function -// doesn't need to do as much error checking and clean-up. This was done to avoid -// repeating these steps on each file being checked during the archive process. -// The more generic fileutils.Matches() can't make these assumptions. -func OptimizedMatches(file string, patterns []string, patDirs [][]string) (bool, error) { +// Matches matches path against all the patterns. Matches is not safe to be +// called concurrently +func (pm *PatternMatcher) Matches(file string) (bool, error) { matched := false file = filepath.FromSlash(file) parentPath := filepath.Dir(file) parentPathDirs := strings.Split(parentPath, string(os.PathSeparator)) - for i, pattern := range patterns { + for _, pattern := range pm.patterns { negative := false - if exclusion(pattern) { + if pattern.exclusion { negative = true - pattern = pattern[1:] } - match, err := regexpMatch(pattern, file) + match, err := pattern.match(file) if err != nil { - return false, fmt.Errorf("Error in pattern (%s): %s", pattern, err) + return false, err } if !match && parentPath != "." { // Check to see if the pattern matches one of our parent dirs. - if len(patDirs[i]) <= len(parentPathDirs) { - match, _ = regexpMatch(strings.Join(patDirs[i], string(os.PathSeparator)), - strings.Join(parentPathDirs[:len(patDirs[i])], string(os.PathSeparator))) + if len(pattern.dirs) <= len(parentPathDirs) { + match, _ = pattern.match(strings.Join(parentPathDirs[:len(pattern.dirs)], string(os.PathSeparator))) } } @@ -120,28 +96,49 @@ func OptimizedMatches(file string, patterns []string, patDirs [][]string) (bool, return matched, nil } -// regexpMatch tries to match the logic of filepath.Match but -// does so using regexp logic. We do this so that we can expand the -// wildcard set to include other things, like "**" to mean any number -// of directories. This means that we should be backwards compatible -// with filepath.Match(). We'll end up supporting more stuff, due to -// the fact that we're using regexp, but that's ok - it does no harm. -// -// As per the comment in golangs filepath.Match, on Windows, escaping -// is disabled. Instead, '\\' is treated as path separator. -func regexpMatch(pattern, path string) (bool, error) { - regStr := "^" +// Exclusions returns true if any of the patterns define exclusions +func (pm *PatternMatcher) Exclusions() bool { + return pm.exclusions +} - // Do some syntax checking on the pattern. - // filepath's Match() has some really weird rules that are inconsistent - // so instead of trying to dup their logic, just call Match() for its - // error state and if there is an error in the pattern return it. - // If this becomes an issue we can remove this since its really only - // needed in the error (syntax) case - which isn't really critical. - if _, err := filepath.Match(pattern, path); err != nil { - return false, err +// Patterns returns array of active patterns +func (pm *PatternMatcher) Patterns() []*Pattern { + return pm.patterns +} + +// Pattern defines a single regexp used used to filter file paths. +type Pattern struct { + cleanedPattern string + dirs []string + regexp *regexp.Regexp + exclusion bool +} + +func (p *Pattern) String() string { + return p.cleanedPattern +} + +// Exclusion returns true if this pattern defines exclusion +func (p *Pattern) Exclusion() bool { + return p.exclusion +} + +func (p *Pattern) match(path string) (bool, error) { + + if p.regexp == nil { + if err := p.compile(); err != nil { + return false, filepath.ErrBadPattern + } } + b := p.regexp.MatchString(path) + + return b, nil +} + +func (p *Pattern) compile() error { + regStr := "^" + pattern := p.cleanedPattern // Go through the pattern and convert it to a regexp. // We use a scanner so we can support utf-8 chars. var scan scanner.Scanner @@ -208,14 +205,30 @@ func regexpMatch(pattern, path string) (bool, error) { regStr += "$" - res, err := regexp.MatchString(regStr, path) - - // Map regexp's error to filepath's so no one knows we're not using filepath + re, err := regexp.Compile(regStr) if err != nil { - err = filepath.ErrBadPattern + return err } - return res, err + p.regexp = re + return nil +} + +// Matches returns true if file matches any of the patterns +// and isn't excluded by any of the subsequent patterns. +func Matches(file string, patterns []string) (bool, error) { + pm, err := NewPatternMatcher(patterns) + if err != nil { + return false, err + } + file = filepath.Clean(file) + + if file == "." { + // Don't let them exclude everything, kind of silly. + return false, nil + } + + return pm.Matches(file) } // CopyFile copies from src to dst until either EOF is reached diff --git a/pkg/fileutils/fileutils_test.go b/pkg/fileutils/fileutils_test.go index 23c48a4a82..0d54392376 100644 --- a/pkg/fileutils/fileutils_test.go +++ b/pkg/fileutils/fileutils_test.go @@ -277,14 +277,6 @@ func TestSingleExclamationError(t *testing.T) { } } -// A string preceded with a ! should return true from Exclusion. -func TestExclusion(t *testing.T) { - exclusion := exclusion("!") - if !exclusion { - t.Errorf("failed to get true for a single !, got %v", exclusion) - } -} - // Matches with no patterns func TestMatchesWithNoPatterns(t *testing.T) { matches, err := Matches("/any/path/there", []string{}) @@ -335,7 +327,7 @@ func TestMatches(t *testing.T) { {"dir/**", "dir/dir2/file", true}, {"dir/**", "dir/dir2/file/", true}, {"**/dir2/*", "dir/dir2/file", true}, - {"**/dir2/*", "dir/dir2/file/", false}, + {"**/dir2/*", "dir/dir2/file/", true}, {"**/dir2/**", "dir/dir2/dir3/file", true}, {"**/dir2/**", "dir/dir2/dir3/file/", true}, {"**file", "file", true}, @@ -384,73 +376,82 @@ func TestMatches(t *testing.T) { } for _, test := range tests { - res, _ := regexpMatch(test.pattern, test.text) + pm, err := NewPatternMatcher([]string{test.pattern}) + if err != nil { + t.Fatalf("invalid pattern %s", test.pattern) + } + res, _ := pm.Matches(test.text) if res != test.pass { t.Fatalf("Failed: %v - res:%v", test, res) } } } -// An empty string should return true from Empty. -func TestEmpty(t *testing.T) { - empty := empty("") - if !empty { - t.Errorf("failed to get true for an empty string, got %v", empty) - } -} - func TestCleanPatterns(t *testing.T) { - cleaned, _, _, _ := CleanPatterns([]string{"docs", "config"}) + patterns := []string{"docs", "config"} + pm, err := NewPatternMatcher(patterns) + if err != nil { + t.Fatalf("invalid pattern %v", patterns) + } + cleaned := pm.Patterns() if len(cleaned) != 2 { t.Errorf("expected 2 element slice, got %v", len(cleaned)) } } func TestCleanPatternsStripEmptyPatterns(t *testing.T) { - cleaned, _, _, _ := CleanPatterns([]string{"docs", "config", ""}) + patterns := []string{"docs", "config", ""} + pm, err := NewPatternMatcher(patterns) + if err != nil { + t.Fatalf("invalid pattern %v", patterns) + } + cleaned := pm.Patterns() if len(cleaned) != 2 { t.Errorf("expected 2 element slice, got %v", len(cleaned)) } } func TestCleanPatternsExceptionFlag(t *testing.T) { - _, _, exceptions, _ := CleanPatterns([]string{"docs", "!docs/README.md"}) - if !exceptions { - t.Errorf("expected exceptions to be true, got %v", exceptions) + patterns := []string{"docs", "!docs/README.md"} + pm, err := NewPatternMatcher(patterns) + if err != nil { + t.Fatalf("invalid pattern %v", patterns) + } + if !pm.Exclusions() { + t.Errorf("expected exceptions to be true, got %v", pm.Exclusions()) } } func TestCleanPatternsLeadingSpaceTrimmed(t *testing.T) { - _, _, exceptions, _ := CleanPatterns([]string{"docs", " !docs/README.md"}) - if !exceptions { - t.Errorf("expected exceptions to be true, got %v", exceptions) + patterns := []string{"docs", " !docs/README.md"} + pm, err := NewPatternMatcher(patterns) + if err != nil { + t.Fatalf("invalid pattern %v", patterns) + } + if !pm.Exclusions() { + t.Errorf("expected exceptions to be true, got %v", pm.Exclusions()) } } func TestCleanPatternsTrailingSpaceTrimmed(t *testing.T) { - _, _, exceptions, _ := CleanPatterns([]string{"docs", "!docs/README.md "}) - if !exceptions { - t.Errorf("expected exceptions to be true, got %v", exceptions) + patterns := []string{"docs", "!docs/README.md "} + pm, err := NewPatternMatcher(patterns) + if err != nil { + t.Fatalf("invalid pattern %v", patterns) + } + if !pm.Exclusions() { + t.Errorf("expected exceptions to be true, got %v", pm.Exclusions()) } } func TestCleanPatternsErrorSingleException(t *testing.T) { - _, _, _, err := CleanPatterns([]string{"!"}) + patterns := []string{"!"} + _, err := NewPatternMatcher(patterns) if err == nil { t.Errorf("expected error on single exclamation point, got %v", err) } } -func TestCleanPatternsFolderSplit(t *testing.T) { - _, dirs, _, _ := CleanPatterns([]string{"docs/config/CONFIG.md"}) - if dirs[0][0] != "docs" { - t.Errorf("expected first element in dirs slice to be docs, got %v", dirs[0][1]) - } - if dirs[0][1] != "config" { - t.Errorf("expected first element in dirs slice to be config, got %v", dirs[0][1]) - } -} - func TestCreateIfNotExistsDir(t *testing.T) { tempFolder, err := ioutil.TempDir("", "docker-fileutils-test") if err != nil { @@ -508,7 +509,7 @@ var matchTests = []matchTest{ {"*c", "abc", true, nil}, {"a*", "a", true, nil}, {"a*", "abc", true, nil}, - {"a*", "ab/c", false, nil}, + {"a*", "ab/c", true, nil}, {"a*/b", "abc/b", true, nil}, {"a*/b", "a/c/b", false, nil}, {"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil}, @@ -579,7 +580,7 @@ func TestMatch(t *testing.T) { pattern = filepath.Clean(pattern) s = filepath.Clean(s) } - ok, err := regexpMatch(pattern, s) + ok, err := Matches(s, []string{pattern}) if ok != tt.match || err != tt.err { t.Fatalf("Match(%#q, %#q) = %v, %q want %v, %q", pattern, s, ok, errp(err), tt.match, errp(tt.err)) }