Ignore min_chars_for_partial_matching unles trigrm

If we're not using a trigram index, then ignore the
min_chars_for_partial_matching setting
This commit is contained in:
Mario de la Ossa 2019-06-12 18:08:44 -06:00
parent 62e52ac6a8
commit 4f04c4c90b
No known key found for this signature in database
GPG key ID: 20CA8F4C6A20761B
6 changed files with 44 additions and 13 deletions

View file

@ -429,7 +429,7 @@ class IssuableFinder
items = klass.with(cte.to_arel).from(klass.table_name)
end
items.full_search(search, matched_columns: params[:in])
items.full_search(search, matched_columns: params[:in], use_minimum_char_limit: !use_cte_for_search?)
end
# rubocop: enable CodeReuse/ActiveRecord

View file

@ -168,7 +168,7 @@ module Issuable
# matched_columns - Modify the scope of the query. 'title', 'description' or joining them with a comma.
#
# Returns an ActiveRecord::Relation.
def full_search(query, matched_columns: 'title,description')
def full_search(query, matched_columns: 'title,description', use_minimum_char_limit: true)
allowed_columns = [:title, :description]
matched_columns = matched_columns.to_s.split(',').map(&:to_sym)
matched_columns &= allowed_columns
@ -176,7 +176,7 @@ module Issuable
# Matching title or description if the matched_columns did not contain any allowed columns.
matched_columns = [:title, :description] if matched_columns.empty?
fuzzy_search(query, matched_columns)
fuzzy_search(query, matched_columns, use_minimum_char_limit: use_minimum_char_limit)
end
def simple_sorts

View file

@ -0,0 +1,5 @@
---
title: Remove minimum character limits for fuzzy searches when using a CTE
merge_request: 29810
author:
type: fixed

View file

@ -9,14 +9,16 @@ module Gitlab
REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/.freeze
class_methods do
def fuzzy_search(query, columns)
matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or)
def fuzzy_search(query, columns, use_minimum_char_limit: true)
matches = columns.map do |col|
fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit)
end.compact.reduce(:or)
where(matches)
end
def to_pattern(query)
if partial_matching?(query)
def to_pattern(query, use_minimum_char_limit: true)
if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit)
"%#{sanitize_sql_like(query)}%"
else
sanitize_sql_like(query)
@ -27,7 +29,9 @@ module Gitlab
MIN_CHARS_FOR_PARTIAL_MATCHING
end
def partial_matching?(query)
def partial_matching?(query, use_minimum_char_limit: true)
return true unless use_minimum_char_limit
query.length >= min_chars_for_partial_matching
end
@ -35,14 +39,14 @@ module Gitlab
# query - The text to search for.
# lower_exact_match - When set to `true` we'll fall back to using
# `LOWER(column) = query` instead of using `ILIKE`.
def fuzzy_arel_match(column, query, lower_exact_match: false)
def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true)
query = query.squish
return unless query.present?
words = select_fuzzy_words(query)
words = select_fuzzy_words(query, use_minimum_char_limit: use_minimum_char_limit)
if words.any?
words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and)
words.map { |word| arel_table[column].matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and)
else
# No words of at least 3 chars, but we can search for an exact
# case insensitive match with the query as a whole
@ -56,7 +60,7 @@ module Gitlab
end
end
def select_fuzzy_words(query)
def select_fuzzy_words(query, use_minimum_char_limit: true)
quoted_words = query.scan(REGEX_QUOTED_WORD)
query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }
@ -67,7 +71,7 @@ module Gitlab
words.concat(quoted_words)
words.select { |word| partial_matching?(word) }
words.select { |word| partial_matching?(word, use_minimum_char_limit: use_minimum_char_limit) }
end
end
end

View file

@ -10,6 +10,12 @@ describe Gitlab::SQL::Pattern do
it 'returns exact matching pattern' do
expect(to_pattern).to eq('12')
end
context 'and ignore_minimum_char_limit is true' do
it 'returns partial matching pattern' do
expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%12%')
end
end
end
context 'when a query with a escape character is shorter than 3 chars' do
@ -18,6 +24,12 @@ describe Gitlab::SQL::Pattern do
it 'returns sanitized exact matching pattern' do
expect(to_pattern).to eq('\_2')
end
context 'and ignore_minimum_char_limit is true' do
it 'returns sanitized partial matching pattern' do
expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%\_2%')
end
end
end
context 'when a query is equal to 3 chars' do

View file

@ -223,6 +223,16 @@ describe Issuable do
expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2])
end
it 'returns issues with a fuzzy matching description for a query shorter than 3 chars if told to do so' do
search = searchable_issue2.description.downcase.scan(/\w+/).sample[-1]
expect(issuable_class.full_search(search, use_minimum_char_limit: false)).to include(searchable_issue2)
end
it 'returns issues with a fuzzy matching title for a query shorter than 3 chars if told to do so' do
expect(issuable_class.full_search('i', use_minimum_char_limit: false)).to include(searchable_issue)
end
context 'when matching columns is "title"' do
it 'returns issues with a matching title' do
expect(issuable_class.full_search(searchable_issue.title, matched_columns: 'title'))