Merge branch 'dm-search-pattern' into 'master'

Use fuzzy search with minimum length of 3 characters where appropriate

Closes #40512

See merge request gitlab-org/gitlab-ce!15592
This commit is contained in:
Sean McGivern 2017-11-29 09:10:43 +00:00
commit a4f8dddc21
17 changed files with 84 additions and 85 deletions

View file

@ -104,8 +104,7 @@ class NotesFinder
query = @params[:search]
return notes unless query
pattern = "%#{query}%"
notes.where(Note.arel_table[:note].matches(pattern))
notes.search(query)
end
# Notes changed since last fetch

View file

@ -1,6 +1,7 @@
module Ci
class Runner < ActiveRecord::Base
extend Gitlab::Ci::Model
include Gitlab::SQL::Pattern
RUNNER_QUEUE_EXPIRY_TIME = 60.minutes
ONLINE_CONTACT_TIMEOUT = 1.hour
@ -59,10 +60,7 @@ module Ci
#
# Returns an ActiveRecord::Relation.
def self.search(query)
t = arel_table
pattern = "%#{query}%"
where(t[:token].matches(pattern).or(t[:description].matches(pattern)))
fuzzy_search(query, [:token, :description])
end
def self.contact_time_deadline

View file

@ -122,9 +122,7 @@ module Issuable
#
# Returns an ActiveRecord::Relation.
def search(query)
title = to_fuzzy_arel(:title, query)
where(title)
fuzzy_search(query, [:title])
end
# Searches for records with a matching title or description.
@ -135,10 +133,7 @@ module Issuable
#
# Returns an ActiveRecord::Relation.
def full_search(query)
title = to_fuzzy_arel(:title, query)
description = to_fuzzy_arel(:description, query)
where(title&.or(description))
fuzzy_search(query, [:title, :description])
end
def sort(method, excluded_labels: [])

View file

@ -1,5 +1,6 @@
class Email < ActiveRecord::Base
include Sortable
include Gitlab::SQL::Pattern
belongs_to :user

View file

@ -50,20 +50,6 @@ class Group < Namespace
Gitlab::Database.postgresql?
end
# Searches for groups matching the given query.
#
# This method uses ILIKE on PostgreSQL and LIKE on MySQL.
#
# query - The search query as a String
#
# Returns an ActiveRecord::Relation.
def search(query)
table = Namespace.arel_table
pattern = "%#{query}%"
where(table[:name].matches(pattern).or(table[:path].matches(pattern)))
end
def sort(method)
if method == 'storage_size_desc'
# storage_size is a virtual column so we need to

View file

@ -13,6 +13,7 @@ class Milestone < ActiveRecord::Base
include Referable
include StripAttribute
include Milestoneish
include Gitlab::SQL::Pattern
cache_markdown_field :title, pipeline: :single_line
cache_markdown_field :description
@ -73,10 +74,7 @@ class Milestone < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search(query)
t = arel_table
pattern = "%#{query}%"
where(t[:title].matches(pattern).or(t[:description].matches(pattern)))
fuzzy_search(query, [:title, :description])
end
def filter_by_state(milestones, state)

View file

@ -9,6 +9,7 @@ class Namespace < ActiveRecord::Base
include Routable
include AfterCommitQueue
include Storage::LegacyNamespace
include Gitlab::SQL::Pattern
# Prevent users from creating unreasonably deep level of nesting.
# The number 20 was taken based on maximum nesting level of
@ -86,10 +87,7 @@ class Namespace < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation
def search(query)
t = arel_table
pattern = "%#{query}%"
where(t[:name].matches(pattern).or(t[:path].matches(pattern)))
fuzzy_search(query, [:name, :path])
end
def clean_path(path)

View file

@ -14,6 +14,7 @@ class Note < ActiveRecord::Base
include ResolvableNote
include IgnorableColumn
include Editable
include Gitlab::SQL::Pattern
module SpecialRole
FIRST_TIME_CONTRIBUTOR = :first_time_contributor
@ -167,6 +168,10 @@ class Note < ActiveRecord::Base
def has_special_role?(role, note)
note.special_role == role
end
def search(query)
fuzzy_search(query, [:note])
end
end
def cross_reference?

View file

@ -426,17 +426,11 @@ class Project < ActiveRecord::Base
#
# query - The search query as a String.
def search(query)
pattern = to_pattern(query)
where(
arel_table[:path].matches(pattern)
.or(arel_table[:name].matches(pattern))
.or(arel_table[:description].matches(pattern))
)
fuzzy_search(query, [:path, :name, :description])
end
def search_by_title(query)
non_archived.where(arel_table[:name].matches(to_pattern(query)))
non_archived.fuzzy_search(query, [:name])
end
def visibility_levels

View file

@ -9,6 +9,7 @@ class Snippet < ActiveRecord::Base
include Mentionable
include Spammable
include Editable
include Gitlab::SQL::Pattern
extend Gitlab::CurrentSettings
@ -135,10 +136,7 @@ class Snippet < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search(query)
t = arel_table
pattern = "%#{query}%"
where(t[:title].matches(pattern).or(t[:file_name].matches(pattern)))
fuzzy_search(query, [:title, :file_name])
end
# Searches for snippets with matching content.
@ -149,10 +147,7 @@ class Snippet < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search_code(query)
table = Snippet.arel_table
pattern = "%#{query}%"
where(table[:content].matches(pattern))
fuzzy_search(query, [:content])
end
end
end

View file

@ -313,9 +313,6 @@ class User < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search(query)
table = arel_table
pattern = User.to_pattern(query)
order = <<~SQL
CASE
WHEN users.name = %{query} THEN 0
@ -325,11 +322,8 @@ class User < ActiveRecord::Base
END
SQL
where(
table[:name].matches(pattern)
.or(table[:email].matches(pattern))
.or(table[:username].matches(pattern))
).reorder(order % { query: ActiveRecord::Base.connection.quote(query) }, :name)
fuzzy_search(query, [:name, :email, :username])
.reorder(order % { query: ActiveRecord::Base.connection.quote(query) }, :name)
end
# searches user by given pattern
@ -337,16 +331,16 @@ class User < ActiveRecord::Base
# This method uses ILIKE on PostgreSQL and LIKE on MySQL.
def search_with_secondary_emails(query)
table = arel_table
email_table = Email.arel_table
pattern = "%#{query}%"
matched_by_emails_user_ids = email_table.project(email_table[:user_id]).where(email_table[:email].matches(pattern))
matched_by_emails_user_ids = email_table
.project(email_table[:user_id])
.where(Email.fuzzy_arel_match(:email, query))
where(
table[:name].matches(pattern)
.or(table[:email].matches(pattern))
.or(table[:username].matches(pattern))
.or(table[:id].in(matched_by_emails_user_ids))
fuzzy_arel_match(:name, query)
.or(fuzzy_arel_match(:email, query))
.or(fuzzy_arel_match(:username, query))
.or(arel_table[:id].in(matched_by_emails_user_ids))
)
end

View file

@ -0,0 +1,5 @@
---
title: Use fuzzy search with minimum length of 3 characters where appropriate
merge_request:
author:
type: performance

View file

@ -4,9 +4,15 @@ module Gitlab
extend ActiveSupport::Concern
MIN_CHARS_FOR_PARTIAL_MATCHING = 3
REGEX_QUOTED_WORD = /(?<=^| )"[^"]+"(?= |$)/
REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/
class_methods do
def fuzzy_search(query, columns)
matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or)
where(matches)
end
def to_pattern(query)
if partial_matching?(query)
"%#{sanitize_sql_like(query)}%"
@ -19,12 +25,19 @@ module Gitlab
query.length >= MIN_CHARS_FOR_PARTIAL_MATCHING
end
def to_fuzzy_arel(column, query)
def fuzzy_arel_match(column, query)
query = query.squish
return nil unless query.present?
words = select_fuzzy_words(query)
matches = words.map { |word| arel_table[column].matches(to_pattern(word)) }
matches.reduce { |result, match| result.and(match) }
if words.any?
words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and)
else
# No words of at least 3 chars, but we can search for an exact
# case insensitive match with the query as a whole
arel_table[column].matches(sanitize_sql_like(query))
end
end
def select_fuzzy_words(query)
@ -32,7 +45,7 @@ module Gitlab
query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }
words = query.split(/\s+/)
words = query.split
quoted_words.map! { |quoted_word| quoted_word[1..-2] }

View file

@ -137,22 +137,22 @@ describe Gitlab::SQL::Pattern do
end
end
describe '.to_fuzzy_arel' do
subject(:to_fuzzy_arel) { Issue.to_fuzzy_arel(:title, query) }
describe '.fuzzy_arel_match' do
subject(:fuzzy_arel_match) { Issue.fuzzy_arel_match(:title, query) }
context 'with a word equal to 3 chars' do
let(:query) { 'foo' }
it 'returns a single ILIKE condition' do
expect(to_fuzzy_arel.to_sql).to match(/title.*I?LIKE '\%foo\%'/)
expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE '\%foo\%'/)
end
end
context 'with a word shorter than 3 chars' do
let(:query) { 'fo' }
it 'returns nil' do
expect(to_fuzzy_arel).to be_nil
it 'returns a single equality condition' do
expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo'/)
end
end
@ -160,7 +160,23 @@ describe Gitlab::SQL::Pattern do
let(:query) { 'foo baz' }
it 'returns a joining LIKE condition using a AND' do
expect(to_fuzzy_arel.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%'/)
expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%'/)
end
end
context 'with two words both shorter than 3 chars' do
let(:query) { 'fo ba' }
it 'returns a single ILIKE condition' do
expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo ba'/)
end
end
context 'with two words, one shorter 3 chars' do
let(:query) { 'foo ba' }
it 'returns a single ILIKE condition using the longer word' do
expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%'/)
end
end
@ -168,7 +184,7 @@ describe Gitlab::SQL::Pattern do
let(:query) { 'foo "really bar" baz' }
it 'returns a joining LIKE condition using a AND' do
expect(to_fuzzy_arel.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%' AND .*title.*I?LIKE '\%really bar\%'/)
expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%' AND .*title.*I?LIKE '\%really bar\%'/)
end
end
end

View file

@ -473,7 +473,7 @@ describe Ci::Runner do
end
describe '.search' do
let(:runner) { create(:ci_runner, token: '123abc') }
let(:runner) { create(:ci_runner, token: '123abc', description: 'test runner') }
it 'returns runners with a matching token' do
expect(described_class.search(runner.token)).to eq([runner])

View file

@ -67,6 +67,7 @@ describe Issuable do
describe ".search" do
let!(:searchable_issue) { create(:issue, title: "Searchable awesome issue") }
let!(:searchable_issue2) { create(:issue, title: 'Aw') }
it 'returns issues with a matching title' do
expect(issuable_class.search(searchable_issue.title))
@ -86,8 +87,8 @@ describe Issuable do
expect(issuable_class.search('searchable issue')).to eq([searchable_issue])
end
it 'returns all issues with a query shorter than 3 chars' do
expect(issuable_class.search('zz')).to eq(issuable_class.all)
it 'returns issues with a matching title for a query shorter than 3 chars' do
expect(issuable_class.search(searchable_issue2.title.downcase)).to eq([searchable_issue2])
end
end
@ -95,6 +96,7 @@ describe Issuable do
let!(:searchable_issue) do
create(:issue, title: "Searchable awesome issue", description: 'Many cute kittens')
end
let!(:searchable_issue2) { create(:issue, title: "Aw", description: "Cu") }
it 'returns issues with a matching title' do
expect(issuable_class.full_search(searchable_issue.title))
@ -133,8 +135,8 @@ describe Issuable do
expect(issuable_class.full_search('many kittens')).to eq([searchable_issue])
end
it 'returns all issues with a query shorter than 3 chars' do
expect(issuable_class.search('zz')).to eq(issuable_class.all)
it 'returns issues with a matching description for a query shorter than 3 chars' do
expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2])
end
end

View file

@ -88,7 +88,7 @@ describe Snippet do
end
describe '.search' do
let(:snippet) { create(:snippet) }
let(:snippet) { create(:snippet, title: 'test snippet') }
it 'returns snippets with a matching title' do
expect(described_class.search(snippet.title)).to eq([snippet])