Add more optimal MatchesUsingParentResult method, use it in pkg/archive

Signed-off-by: Aaron Lehmann <alehmann@netflix.com>
This commit is contained in:
Aaron Lehmann 2021-08-12 13:57:50 -07:00
parent 90f8d1b675
commit 9bae4f2f24
3 changed files with 99 additions and 16 deletions

View File

@ -817,6 +817,11 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
for _, include := range options.IncludeFiles {
rebaseName := options.RebaseNames[include]
var (
parentMatched []bool
parentDirs []string
)
walkRoot := getWalkRoot(srcPath, include)
filepath.Walk(walkRoot, func(filePath string, f os.FileInfo, err error) error {
if err != nil {
@ -843,11 +848,29 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
// is asking for that file no matter what - which is true
// for some files, like .dockerignore and Dockerfile (sometimes)
if include != relFilePath {
skip, err = pm.Matches(relFilePath)
for len(parentDirs) != 0 {
lastParentDir := parentDirs[len(parentDirs)-1]
if strings.HasPrefix(relFilePath, lastParentDir+string(os.PathSeparator)) {
break
}
parentDirs = parentDirs[:len(parentDirs)-1]
parentMatched = parentMatched[:len(parentMatched)-1]
}
if len(parentMatched) != 0 {
skip, err = pm.MatchesUsingParentResult(relFilePath, parentMatched[len(parentMatched)-1])
} else {
skip, err = pm.Matches(relFilePath)
}
if err != nil {
logrus.Errorf("Error matching %s: %v", relFilePath, err)
return err
}
if f.IsDir() {
parentDirs = append(parentDirs, relFilePath)
parentMatched = append(parentMatched, skip)
}
}
if skip {

View File

@ -55,8 +55,12 @@ func NewPatternMatcher(patterns []string) (*PatternMatcher, error) {
return pm, nil
}
// Matches matches path against all the patterns. Matches is not safe to be
// called concurrently
// Matches returns true if "file" matches any of the patterns
// and isn't excluded by any of the subsequent patterns.
//
// The "file" argument should be a slash-delimited path.
//
// Matches is not safe to call concurrently.
func (pm *PatternMatcher) Matches(file string) (bool, error) {
matched := false
file = filepath.FromSlash(file)
@ -64,10 +68,11 @@ func (pm *PatternMatcher) Matches(file string) (bool, error) {
parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
for _, pattern := range pm.patterns {
negative := false
if pattern.exclusion {
negative = true
// Skip evaluation if this is an inclusion and the filename
// already matched the pattern, or it's an exclusion and it has
// not matched the pattern yet.
if pattern.exclusion != matched {
continue
}
match, err := pattern.match(file)
@ -86,13 +91,45 @@ func (pm *PatternMatcher) Matches(file string) (bool, error) {
}
if match {
matched = !negative
matched = !pattern.exclusion
}
}
return matched, nil
}
// MatchesUsingParentResult returns true if "file" matches any of the patterns
// and isn't excluded by any of the subsequent patterns. The functionality is
// the same as Matches, but as an optimization, the caller keeps track of
// whether the parent directory matched.
//
// The "file" argument should be a slash-delimited path.
//
// MatchesUsingParentResult is not safe to call concurrently.
func (pm *PatternMatcher) MatchesUsingParentResult(file string, parentMatched bool) (bool, error) {
matched := parentMatched
file = filepath.FromSlash(file)
for _, pattern := range pm.patterns {
// Skip evaluation if this is an inclusion and the filename
// already matched the pattern, or it's an exclusion and it has
// not matched the pattern yet.
if pattern.exclusion != matched {
continue
}
match, err := pattern.match(file)
if err != nil {
return false, err
}
if match {
matched = !pattern.exclusion
}
}
return matched, nil
}
// Exclusions returns true if any of the patterns define exclusions
func (pm *PatternMatcher) Exclusions() bool {
return pm.exclusions
@ -121,7 +158,6 @@ func (p *Pattern) Exclusion() bool {
}
func (p *Pattern) match(path string) (bool, error) {
if p.regexp == nil {
if err := p.compile(); err != nil {
return false, filepath.ErrBadPattern

View File

@ -382,13 +382,37 @@ func TestMatches(t *testing.T) {
}...)
}
for _, test := range tests {
desc := fmt.Sprintf("pattern=%q text=%q", test.pattern, test.text)
pm, err := NewPatternMatcher([]string{test.pattern})
assert.NilError(t, err, desc)
res, _ := pm.Matches(test.text)
assert.Check(t, is.Equal(test.pass, res), desc)
}
t.Run("Matches", func(t *testing.T) {
for _, test := range tests {
desc := fmt.Sprintf("pattern=%q text=%q", test.pattern, test.text)
pm, err := NewPatternMatcher([]string{test.pattern})
assert.NilError(t, err, desc)
res, _ := pm.Matches(test.text)
assert.Check(t, is.Equal(test.pass, res), desc)
}
})
t.Run("MatchesUsingParentResult", func(t *testing.T) {
for _, test := range tests {
desc := fmt.Sprintf("pattern=%q text=%q", test.pattern, test.text)
pm, err := NewPatternMatcher([]string{test.pattern})
assert.NilError(t, err, desc)
parentPath := path.Dir(test.text)
parentPathDirs := strings.Split(parentPath, "/")
parentMatched := false
if parentPath != "." {
for i := range parentPathDirs {
parentMatched, _ = pm.MatchesUsingParentResult(strings.Join(parentPathDirs[:i+1], "/"), parentMatched)
}
}
res, _ := pm.MatchesUsingParentResult(test.text, parentMatched)
assert.Check(t, is.Equal(test.pass, res), desc)
}
})
}
func TestCleanPatterns(t *testing.T) {