diff --git a/pkg/fileutils/fileutils.go b/pkg/fileutils/fileutils.go index 77152a6678..d630795359 100644 --- a/pkg/fileutils/fileutils.go +++ b/pkg/fileutils/fileutils.go @@ -285,12 +285,23 @@ func (pm *PatternMatcher) Patterns() []*Pattern { // Pattern defines a single regexp used to filter file paths. type Pattern struct { + matchType matchType cleanedPattern string dirs []string regexp *regexp.Regexp exclusion bool } +type matchType int + +const ( + unknownMatch matchType = iota + exactMatch + prefixMatch + suffixMatch + regexpMatch +) + func (p *Pattern) String() string { return p.cleanedPattern } @@ -301,18 +312,34 @@ func (p *Pattern) Exclusion() bool { } func (p *Pattern) match(path string) (bool, error) { - if p.regexp == nil { - if err := p.compile(); err != nil { + if p.matchType == unknownMatch { + if err := p.compile(string(os.PathSeparator)); err != nil { return false, filepath.ErrBadPattern } } - b := p.regexp.MatchString(path) + switch p.matchType { + case exactMatch: + return path == p.cleanedPattern, nil + case prefixMatch: + // strip trailing ** + return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil + case suffixMatch: + // strip leading ** + suffix := p.cleanedPattern[2:] + if strings.HasSuffix(path, suffix) { + return true, nil + } + // **/foo matches "foo" + return suffix[0] == os.PathSeparator && path == suffix[1:], nil + case regexpMatch: + return p.regexp.MatchString(path), nil + } - return b, nil + return false, nil } -func (p *Pattern) compile() error { +func (p *Pattern) compile(sl string) error { regStr := "^" pattern := p.cleanedPattern // Go through the pattern and convert it to a regexp. @@ -320,13 +347,13 @@ func (p *Pattern) compile() error { var scan scanner.Scanner scan.Init(strings.NewReader(pattern)) - sl := string(os.PathSeparator) escSL := sl if sl == `\` { escSL += `\` } - for scan.Peek() != scanner.EOF { + p.matchType = exactMatch + for i := 0; scan.Peek() != scanner.EOF; i++ { ch := scan.Next() if ch == '*' { @@ -341,20 +368,32 @@ func (p *Pattern) compile() error { if scan.Peek() == scanner.EOF { // is "**EOF" - to align with .gitignore just accept all - regStr += ".*" + if p.matchType == exactMatch { + p.matchType = prefixMatch + } else { + regStr += ".*" + p.matchType = regexpMatch + } } else { // is "**" // Note that this allows for any # of /'s (even 0) because // the .* will eat everything, even /'s regStr += "(.*" + escSL + ")?" + p.matchType = regexpMatch + } + + if i == 0 { + p.matchType = suffixMatch } } else { // is "*" so map it to anything but "/" regStr += "[^" + escSL + "]*" + p.matchType = regexpMatch } } else if ch == '?' { // "?" is any char except "/" regStr += "[^" + escSL + "]" + p.matchType = regexpMatch } else if shouldEscape(ch) { // Escape some regexp special chars that have no meaning // in golang's filepath.Match @@ -371,14 +410,22 @@ func (p *Pattern) compile() error { } if scan.Peek() != scanner.EOF { regStr += `\` + string(scan.Next()) + p.matchType = regexpMatch } else { regStr += `\` } + } else if ch == '[' || ch == ']' { + regStr += string(ch) + p.matchType = regexpMatch } else { regStr += string(ch) } } + if p.matchType != regexpMatch { + return nil + } + regStr += "$" re, err := regexp.Compile(regStr) @@ -387,6 +434,7 @@ func (p *Pattern) compile() error { } p.regexp = re + p.matchType = regexpMatch return nil } diff --git a/pkg/fileutils/fileutils_test.go b/pkg/fileutils/fileutils_test.go index b24d82ebd2..3eb7b520e9 100644 --- a/pkg/fileutils/fileutils_test.go +++ b/pkg/fileutils/fileutils_test.go @@ -382,10 +382,14 @@ func TestMatches(t *testing.T) { {"a(b)c/def", "a(b)c/def", true}, {"a(b)c/def", "a(b)c/xyz", false}, {"a.|)$(}+{bc", "a.|)$(}+{bc", true}, + {"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true}, + {"dist/*.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true}, } multiPatternTests := []multiPatternTestCase{ {[]string{"**", "!util/docker/web"}, "util/docker/web/foo", false}, {[]string{"**", "!util/docker/web", "util/docker/web/foo"}, "util/docker/web/foo", true}, + {[]string{"**", "!dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false}, + {[]string{"**", "!dist/*.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false}, } if runtime.GOOS != "windows" { @@ -671,7 +675,7 @@ func errp(e error) string { return e.Error() } -// TestMatch test's our version of filepath.Match, called regexpMatch. +// TestMatch tests our version of filepath.Match, called Matches. func TestMatch(t *testing.T) { for _, tt := range matchTests { pattern := tt.pattern @@ -690,3 +694,78 @@ func TestMatch(t *testing.T) { } } } + +type compileTestCase struct { + pattern string + matchType matchType + compiledRegexp string + windowsCompiledRegexp string +} + +var compileTests = []compileTestCase{ + {"*", regexpMatch, `^[^/]*$`, `^[^\\]*$`}, + {"file*", regexpMatch, `^file[^/]*$`, `^file[^\\]*$`}, + {"*file", regexpMatch, `^[^/]*file$`, `^[^\\]*file$`}, + {"a*/b", regexpMatch, `^a[^/]*/b$`, `^a[^\\]*\\b$`}, + {"**", suffixMatch, "", ""}, + {"**/**", regexpMatch, `^(.*/)?.*$`, `^(.*\\)?.*$`}, + {"dir/**", prefixMatch, "", ""}, + {"**/dir", suffixMatch, "", ""}, + {"**/dir2/*", regexpMatch, `^(.*/)?dir2/[^/]*$`, `^(.*\\)?dir2\\[^\\]*$`}, + {"**/dir2/**", regexpMatch, `^(.*/)?dir2/.*$`, `^(.*\\)?dir2\\.*$`}, + {"**file", suffixMatch, "", ""}, + {"**/file*txt", regexpMatch, `^(.*/)?file[^/]*txt$`, `^(.*\\)?file[^\\]*txt$`}, + {"**/**/*.txt", regexpMatch, `^(.*/)?(.*/)?[^/]*\.txt$`, `^(.*\\)?(.*\\)?[^\\]*\.txt$`}, + {"a[b-d]e", regexpMatch, `^a[b-d]e$`, `^a[b-d]e$`}, + {".*", regexpMatch, `^\.[^/]*$`, `^\.[^\\]*$`}, + {"abc.def", exactMatch, "", ""}, + {"abc?def", regexpMatch, `^abc[^/]def$`, `^abc[^\\]def$`}, + {"**/foo/bar", suffixMatch, "", ""}, + {"a(b)c/def", exactMatch, "", ""}, + {"a.|)$(}+{bc", exactMatch, "", ""}, + {"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", exactMatch, "", ""}, +} + +// TestCompile confirms that "compile" assigns the correct match type to a +// variety of test case patterns. If the match type is regexp, it also confirms +// that the compiled regexp matches the expected regexp. +func TestCompile(t *testing.T) { + t.Run("slash", testCompile("/")) + t.Run("backslash", testCompile(`\`)) +} + +func testCompile(sl string) func(*testing.T) { + return func(t *testing.T) { + for _, tt := range compileTests { + // Avoid NewPatternMatcher, which has platform-specific behavior + pm := &PatternMatcher{ + patterns: make([]*Pattern, 1), + } + pattern := path.Clean(tt.pattern) + if sl != "/" { + pattern = strings.ReplaceAll(pattern, "/", sl) + } + newp := &Pattern{} + newp.cleanedPattern = pattern + newp.dirs = strings.Split(pattern, sl) + pm.patterns[0] = newp + + if err := pm.patterns[0].compile(sl); err != nil { + t.Fatalf("Failed to compile pattern %q: %v", pattern, err) + } + if pm.patterns[0].matchType != tt.matchType { + t.Errorf("pattern %q: matchType = %v, want %v", pattern, pm.patterns[0].matchType, tt.matchType) + continue + } + if tt.matchType == regexpMatch { + if sl == `\` { + if pm.patterns[0].regexp.String() != tt.windowsCompiledRegexp { + t.Errorf("pattern %q: regexp = %s, want %s", pattern, pm.patterns[0].regexp, tt.windowsCompiledRegexp) + } + } else if pm.patterns[0].regexp.String() != tt.compiledRegexp { + t.Errorf("pattern %q: regexp = %s, want %s", pattern, pm.patterns[0].regexp, tt.compiledRegexp) + } + } + } + } +}