From 195558de5aba370bdb06109629653b36ca7313d3 Mon Sep 17 00:00:00 2001 From: Milas Bowman Date: Fri, 22 Oct 2021 11:26:15 -0400 Subject: [PATCH] pkg/fileutils: escape additional regex meta characters There were a couple characters being explicitly escaped, but it wasn't comprehensive. This is now the set difference between the Golang regex meta characters and the `filepath` match meta characters with the exception of `\`, which already has special logic due to being the path separator on Windows. Signed-off-by: Milas Bowman --- pkg/fileutils/fileutils.go | 24 +++++++++++++++++++++++- pkg/fileutils/fileutils_test.go | 3 +++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/pkg/fileutils/fileutils.go b/pkg/fileutils/fileutils.go index da4ffd13e2..dac522ccb1 100644 --- a/pkg/fileutils/fileutils.go +++ b/pkg/fileutils/fileutils.go @@ -9,8 +9,30 @@ import ( "regexp" "strings" "text/scanner" + "unicode/utf8" ) +// escapeBytes is a bitmap used to check whether a character should be escaped when creating the regex. +var escapeBytes [8]byte + +// shouldEscape reports whether a rune should be escaped as part of the regex. +// +// This only includes characters that require escaping in regex but are also NOT valid filepath pattern characters. +// Additionally, '\' is not excluded because there is specific logic to properly handle this, as it's a path separator +// on Windows. +// +// Adapted from regexp::QuoteMeta in go stdlib. +// See https://cs.opensource.google/go/go/+/refs/tags/go1.17.2:src/regexp/regexp.go;l=703-715;drc=refs%2Ftags%2Fgo1.17.2 +func shouldEscape(b rune) bool { + return b < utf8.RuneSelf && escapeBytes[b%8]&(1<<(b/8)) != 0 +} + +func init() { + for _, b := range []byte(`.+()|{}$`) { + escapeBytes[b%8] |= 1 << (b / 8) + } +} + // PatternMatcher allows checking paths against a list of patterns type PatternMatcher struct { patterns []*Pattern @@ -256,7 +278,7 @@ func (p *Pattern) compile() error { } else if ch == '?' { // "?" is any char except "/" regStr += "[^" + escSL + "]" - } else if ch == '.' || ch == '$' { + } else if shouldEscape(ch) { // Escape some regexp special chars that have no meaning // in golang's filepath.Match regStr += `\` + string(ch) diff --git a/pkg/fileutils/fileutils_test.go b/pkg/fileutils/fileutils_test.go index 12ee86d5a8..2d66b431d8 100644 --- a/pkg/fileutils/fileutils_test.go +++ b/pkg/fileutils/fileutils_test.go @@ -373,6 +373,9 @@ func TestMatches(t *testing.T) { {"abc/**", "abc/def/ghi", true}, {"**/.foo", ".foo", true}, {"**/.foo", "bar.foo", false}, + {"a(b)c/def", "a(b)c/def", true}, + {"a(b)c/def", "a(b)c/xyz", false}, + {"a.|)$(}+{bc", "a.|)$(}+{bc", true}, } if runtime.GOOS != "windows" {