Ignore min_chars_for_partial_matching unles trigrm
If we're not using a trigram index, then ignore the min_chars_for_partial_matching setting
This commit is contained in:
parent
62e52ac6a8
commit
4f04c4c90b
6 changed files with 44 additions and 13 deletions
|
@ -429,7 +429,7 @@ class IssuableFinder
|
|||
items = klass.with(cte.to_arel).from(klass.table_name)
|
||||
end
|
||||
|
||||
items.full_search(search, matched_columns: params[:in])
|
||||
items.full_search(search, matched_columns: params[:in], use_minimum_char_limit: !use_cte_for_search?)
|
||||
end
|
||||
# rubocop: enable CodeReuse/ActiveRecord
|
||||
|
||||
|
|
|
@ -168,7 +168,7 @@ module Issuable
|
|||
# matched_columns - Modify the scope of the query. 'title', 'description' or joining them with a comma.
|
||||
#
|
||||
# Returns an ActiveRecord::Relation.
|
||||
def full_search(query, matched_columns: 'title,description')
|
||||
def full_search(query, matched_columns: 'title,description', use_minimum_char_limit: true)
|
||||
allowed_columns = [:title, :description]
|
||||
matched_columns = matched_columns.to_s.split(',').map(&:to_sym)
|
||||
matched_columns &= allowed_columns
|
||||
|
@ -176,7 +176,7 @@ module Issuable
|
|||
# Matching title or description if the matched_columns did not contain any allowed columns.
|
||||
matched_columns = [:title, :description] if matched_columns.empty?
|
||||
|
||||
fuzzy_search(query, matched_columns)
|
||||
fuzzy_search(query, matched_columns, use_minimum_char_limit: use_minimum_char_limit)
|
||||
end
|
||||
|
||||
def simple_sorts
|
||||
|
|
5
changelogs/unreleased/40379-CJK-search-min-chars.yml
Normal file
5
changelogs/unreleased/40379-CJK-search-min-chars.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Remove minimum character limits for fuzzy searches when using a CTE
|
||||
merge_request: 29810
|
||||
author:
|
||||
type: fixed
|
|
@ -9,14 +9,16 @@ module Gitlab
|
|||
REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/.freeze
|
||||
|
||||
class_methods do
|
||||
def fuzzy_search(query, columns)
|
||||
matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or)
|
||||
def fuzzy_search(query, columns, use_minimum_char_limit: true)
|
||||
matches = columns.map do |col|
|
||||
fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit)
|
||||
end.compact.reduce(:or)
|
||||
|
||||
where(matches)
|
||||
end
|
||||
|
||||
def to_pattern(query)
|
||||
if partial_matching?(query)
|
||||
def to_pattern(query, use_minimum_char_limit: true)
|
||||
if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit)
|
||||
"%#{sanitize_sql_like(query)}%"
|
||||
else
|
||||
sanitize_sql_like(query)
|
||||
|
@ -27,7 +29,9 @@ module Gitlab
|
|||
MIN_CHARS_FOR_PARTIAL_MATCHING
|
||||
end
|
||||
|
||||
def partial_matching?(query)
|
||||
def partial_matching?(query, use_minimum_char_limit: true)
|
||||
return true unless use_minimum_char_limit
|
||||
|
||||
query.length >= min_chars_for_partial_matching
|
||||
end
|
||||
|
||||
|
@ -35,14 +39,14 @@ module Gitlab
|
|||
# query - The text to search for.
|
||||
# lower_exact_match - When set to `true` we'll fall back to using
|
||||
# `LOWER(column) = query` instead of using `ILIKE`.
|
||||
def fuzzy_arel_match(column, query, lower_exact_match: false)
|
||||
def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true)
|
||||
query = query.squish
|
||||
return unless query.present?
|
||||
|
||||
words = select_fuzzy_words(query)
|
||||
words = select_fuzzy_words(query, use_minimum_char_limit: use_minimum_char_limit)
|
||||
|
||||
if words.any?
|
||||
words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and)
|
||||
words.map { |word| arel_table[column].matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and)
|
||||
else
|
||||
# No words of at least 3 chars, but we can search for an exact
|
||||
# case insensitive match with the query as a whole
|
||||
|
@ -56,7 +60,7 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
def select_fuzzy_words(query)
|
||||
def select_fuzzy_words(query, use_minimum_char_limit: true)
|
||||
quoted_words = query.scan(REGEX_QUOTED_WORD)
|
||||
|
||||
query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }
|
||||
|
@ -67,7 +71,7 @@ module Gitlab
|
|||
|
||||
words.concat(quoted_words)
|
||||
|
||||
words.select { |word| partial_matching?(word) }
|
||||
words.select { |word| partial_matching?(word, use_minimum_char_limit: use_minimum_char_limit) }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,6 +10,12 @@ describe Gitlab::SQL::Pattern do
|
|||
it 'returns exact matching pattern' do
|
||||
expect(to_pattern).to eq('12')
|
||||
end
|
||||
|
||||
context 'and ignore_minimum_char_limit is true' do
|
||||
it 'returns partial matching pattern' do
|
||||
expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%12%')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when a query with a escape character is shorter than 3 chars' do
|
||||
|
@ -18,6 +24,12 @@ describe Gitlab::SQL::Pattern do
|
|||
it 'returns sanitized exact matching pattern' do
|
||||
expect(to_pattern).to eq('\_2')
|
||||
end
|
||||
|
||||
context 'and ignore_minimum_char_limit is true' do
|
||||
it 'returns sanitized partial matching pattern' do
|
||||
expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%\_2%')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when a query is equal to 3 chars' do
|
||||
|
|
|
@ -223,6 +223,16 @@ describe Issuable do
|
|||
expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2])
|
||||
end
|
||||
|
||||
it 'returns issues with a fuzzy matching description for a query shorter than 3 chars if told to do so' do
|
||||
search = searchable_issue2.description.downcase.scan(/\w+/).sample[-1]
|
||||
|
||||
expect(issuable_class.full_search(search, use_minimum_char_limit: false)).to include(searchable_issue2)
|
||||
end
|
||||
|
||||
it 'returns issues with a fuzzy matching title for a query shorter than 3 chars if told to do so' do
|
||||
expect(issuable_class.full_search('i', use_minimum_char_limit: false)).to include(searchable_issue)
|
||||
end
|
||||
|
||||
context 'when matching columns is "title"' do
|
||||
it 'returns issues with a matching title' do
|
||||
expect(issuable_class.full_search(searchable_issue.title, matched_columns: 'title'))
|
||||
|
|
Loading…
Reference in a new issue