Implement filtering by filename on code search

This commit is contained in:
Mario de la Ossa 2018-06-06 18:14:10 -06:00
parent 60b102be3d
commit 7357209f91
No known key found for this signature in database
GPG key ID: 20CA8F4C6A20761B
8 changed files with 186 additions and 6 deletions

View file

@ -0,0 +1,5 @@
---
title: Add filename filtering to code search
merge_request: 19509
author:
type: added

View file

@ -776,6 +776,15 @@ Example response:
### Scope: blobs ### Scope: blobs
Filters are available for this scope:
- filename
- path
- extension
to use a filter simply include it in your query like so: `a query filename:some_name*`.
You may use wildcards (`*`) to use glob matching.
```bash ```bash
curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=blobs&search=installation curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=blobs&search=installation
``` ```

View file

@ -14,14 +14,21 @@ module Gitlab
end end
def find(query) def find(query)
by_content = find_by_content(query) query = Gitlab::Search::Query.new(query) do
filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i }
filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i }
filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i }
end
by_content = find_by_content(query.term)
already_found = Set.new(by_content.map(&:filename)) already_found = Set.new(by_content.map(&:filename))
by_filename = find_by_filename(query, except: already_found) by_filename = find_by_filename(query.term, except: already_found)
(by_content + by_filename) files = (by_content + by_filename)
.sort_by(&:filename) .sort_by(&:filename)
.map { |blob| [blob.filename, blob] }
query.filter_results(files).map { |blob| [blob.filename, blob] }
end end
private private

View file

@ -0,0 +1,23 @@
module Gitlab
module Search
class ParsedQuery
attr_reader :term, :filters
def initialize(term, filters)
@term = term
@filters = filters
end
def filter_results(results)
filters = @filters.reject { |filter| filter[:matcher].nil? }
return unless filters
results.select do |result|
filters.all? do |filter|
filter[:matcher].call(filter, result)
end
end
end
end
end
end

View file

@ -0,0 +1,55 @@
module Gitlab
module Search
class Query < SimpleDelegator
def initialize(query, filter_opts = {}, &block)
@raw_query = query.dup
@filters = []
@filter_options = { default_parser: :downcase.to_proc }.merge(filter_opts)
self.instance_eval(&block) if block_given?
@query = Gitlab::Search::ParsedQuery.new(*extract_filters)
# set the ParsedQuery as our default delegator thanks to SimpleDelegator
super(@query)
end
private
def filter(name, **attributes)
filter = { parser: @filter_options[:default_parser], name: name }.merge(attributes)
@filters << filter
end
def filter_options(**options)
@filter_options.merge!(options)
end
def extract_filters
fragments = []
filters = @filters.each_with_object([]) do |filter, parsed_filters|
match = @raw_query.split.find { |part| part =~ /\A#{filter[:name]}:/ }
next unless match
input = match.split(':')[1..-1].join
next if input.empty?
filter[:value] = parse_filter(filter, input)
filter[:regex_value] = Regexp.escape(filter[:value]).gsub('\*', '.*?')
fragments << match
parsed_filters << filter
end
query = (@raw_query.split - fragments).join(' ')
[query, filters]
end
def parse_filter(filter, input)
filter[:parser].call(input)
end
end
end
end

View file

@ -3,11 +3,29 @@ require 'spec_helper'
describe Gitlab::FileFinder do describe Gitlab::FileFinder do
describe '#find' do describe '#find' do
let(:project) { create(:project, :public, :repository) } let(:project) { create(:project, :public, :repository) }
subject { described_class.new(project, project.default_branch) }
it_behaves_like 'file finder' do it_behaves_like 'file finder' do
subject { described_class.new(project, project.default_branch) }
let(:expected_file_by_name) { 'files/images/wm.svg' } let(:expected_file_by_name) { 'files/images/wm.svg' }
let(:expected_file_by_content) { 'CHANGELOG' } let(:expected_file_by_content) { 'CHANGELOG' }
end end
it 'filters by name' do
results = subject.find('files filename:wm.svg')
expect(results.count).to eq(1)
end
it 'filters by path' do
results = subject.find('white path:images')
expect(results.count).to eq(1)
end
it 'filters by extension' do
results = subject.find('files extension:svg')
expect(results.count).to eq(1)
end
end end
end end

View file

@ -0,0 +1,39 @@
require 'spec_helper'
describe Gitlab::Search::Query do
let(:query) { 'base filter:wow anotherfilter:noway name:maybe other:mmm leftover' }
let(:subject) do
described_class.new(query) do
filter :filter
filter :name, parser: :upcase.to_proc
filter :other
end
end
it { expect(described_class).to be < SimpleDelegator }
it 'leaves undefined filters in the main query' do
expect(subject.term).to eq('base anotherfilter:noway leftover')
end
it 'parses filters' do
expect(subject.filters.count).to eq(3)
expect(subject.filters.map { |f| f[:value] }).to match_array(%w[wow MAYBE mmm])
end
context 'with an empty filter' do
let(:query) { 'some bar name: baz' }
it 'ignores empty filters' do
expect(subject.term).to eq('some bar name: baz')
end
end
context 'with a pipe' do
let(:query) { 'base | nofilter' }
it 'does not escape the pipe' do
expect(subject.term).to eq(query)
end
end
end

View file

@ -312,6 +312,30 @@ describe API::Search do
end end
it_behaves_like 'response is correct', schema: 'public_api/v4/blobs', size: 2 it_behaves_like 'response is correct', schema: 'public_api/v4/blobs', size: 2
context 'filters' do
it 'by filename' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon filename:PROCESS.md'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(2)
expect(json_response.first['filename']).to eq('PROCESS.md')
end
it 'by path' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon path:markdown'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(8)
end
it 'by extension' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon extension:md'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(11)
end
end
end end
end end
end end