From 4f04c4c90b2db8ddcf5f3e28a9bbefd20c8bbda0 Mon Sep 17 00:00:00 2001 From: Mario de la Ossa Date: Wed, 12 Jun 2019 18:08:44 -0600 Subject: [PATCH] Ignore min_chars_for_partial_matching unles trigrm If we're not using a trigram index, then ignore the min_chars_for_partial_matching setting --- app/finders/issuable_finder.rb | 2 +- app/models/concerns/issuable.rb | 4 ++-- .../unreleased/40379-CJK-search-min-chars.yml | 5 ++++ lib/gitlab/sql/pattern.rb | 24 +++++++++++-------- spec/lib/gitlab/sql/pattern_spec.rb | 12 ++++++++++ spec/models/concerns/issuable_spec.rb | 10 ++++++++ 6 files changed, 44 insertions(+), 13 deletions(-) create mode 100644 changelogs/unreleased/40379-CJK-search-min-chars.yml diff --git a/app/finders/issuable_finder.rb b/app/finders/issuable_finder.rb index 3592505a977..f4fbeacfaba 100644 --- a/app/finders/issuable_finder.rb +++ b/app/finders/issuable_finder.rb @@ -429,7 +429,7 @@ class IssuableFinder items = klass.with(cte.to_arel).from(klass.table_name) end - items.full_search(search, matched_columns: params[:in]) + items.full_search(search, matched_columns: params[:in], use_minimum_char_limit: !use_cte_for_search?) end # rubocop: enable CodeReuse/ActiveRecord diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb index 299e413321d..952de92cae1 100644 --- a/app/models/concerns/issuable.rb +++ b/app/models/concerns/issuable.rb @@ -168,7 +168,7 @@ module Issuable # matched_columns - Modify the scope of the query. 'title', 'description' or joining them with a comma. # # Returns an ActiveRecord::Relation. - def full_search(query, matched_columns: 'title,description') + def full_search(query, matched_columns: 'title,description', use_minimum_char_limit: true) allowed_columns = [:title, :description] matched_columns = matched_columns.to_s.split(',').map(&:to_sym) matched_columns &= allowed_columns @@ -176,7 +176,7 @@ module Issuable # Matching title or description if the matched_columns did not contain any allowed columns. matched_columns = [:title, :description] if matched_columns.empty? - fuzzy_search(query, matched_columns) + fuzzy_search(query, matched_columns, use_minimum_char_limit: use_minimum_char_limit) end def simple_sorts diff --git a/changelogs/unreleased/40379-CJK-search-min-chars.yml b/changelogs/unreleased/40379-CJK-search-min-chars.yml new file mode 100644 index 00000000000..6f6c4df464f --- /dev/null +++ b/changelogs/unreleased/40379-CJK-search-min-chars.yml @@ -0,0 +1,5 @@ +--- +title: Remove minimum character limits for fuzzy searches when using a CTE +merge_request: 29810 +author: +type: fixed diff --git a/lib/gitlab/sql/pattern.rb b/lib/gitlab/sql/pattern.rb index fd108b4c124..f6edbfced7f 100644 --- a/lib/gitlab/sql/pattern.rb +++ b/lib/gitlab/sql/pattern.rb @@ -9,14 +9,16 @@ module Gitlab REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/.freeze class_methods do - def fuzzy_search(query, columns) - matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or) + def fuzzy_search(query, columns, use_minimum_char_limit: true) + matches = columns.map do |col| + fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit) + end.compact.reduce(:or) where(matches) end - def to_pattern(query) - if partial_matching?(query) + def to_pattern(query, use_minimum_char_limit: true) + if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit) "%#{sanitize_sql_like(query)}%" else sanitize_sql_like(query) @@ -27,7 +29,9 @@ module Gitlab MIN_CHARS_FOR_PARTIAL_MATCHING end - def partial_matching?(query) + def partial_matching?(query, use_minimum_char_limit: true) + return true unless use_minimum_char_limit + query.length >= min_chars_for_partial_matching end @@ -35,14 +39,14 @@ module Gitlab # query - The text to search for. # lower_exact_match - When set to `true` we'll fall back to using # `LOWER(column) = query` instead of using `ILIKE`. - def fuzzy_arel_match(column, query, lower_exact_match: false) + def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true) query = query.squish return unless query.present? - words = select_fuzzy_words(query) + words = select_fuzzy_words(query, use_minimum_char_limit: use_minimum_char_limit) if words.any? - words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and) + words.map { |word| arel_table[column].matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and) else # No words of at least 3 chars, but we can search for an exact # case insensitive match with the query as a whole @@ -56,7 +60,7 @@ module Gitlab end end - def select_fuzzy_words(query) + def select_fuzzy_words(query, use_minimum_char_limit: true) quoted_words = query.scan(REGEX_QUOTED_WORD) query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') } @@ -67,7 +71,7 @@ module Gitlab words.concat(quoted_words) - words.select { |word| partial_matching?(word) } + words.select { |word| partial_matching?(word, use_minimum_char_limit: use_minimum_char_limit) } end end end diff --git a/spec/lib/gitlab/sql/pattern_spec.rb b/spec/lib/gitlab/sql/pattern_spec.rb index 5b5052de372..98838712eae 100644 --- a/spec/lib/gitlab/sql/pattern_spec.rb +++ b/spec/lib/gitlab/sql/pattern_spec.rb @@ -10,6 +10,12 @@ describe Gitlab::SQL::Pattern do it 'returns exact matching pattern' do expect(to_pattern).to eq('12') end + + context 'and ignore_minimum_char_limit is true' do + it 'returns partial matching pattern' do + expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%12%') + end + end end context 'when a query with a escape character is shorter than 3 chars' do @@ -18,6 +24,12 @@ describe Gitlab::SQL::Pattern do it 'returns sanitized exact matching pattern' do expect(to_pattern).to eq('\_2') end + + context 'and ignore_minimum_char_limit is true' do + it 'returns sanitized partial matching pattern' do + expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%\_2%') + end + end end context 'when a query is equal to 3 chars' do diff --git a/spec/models/concerns/issuable_spec.rb b/spec/models/concerns/issuable_spec.rb index 64f02978d79..68224a56515 100644 --- a/spec/models/concerns/issuable_spec.rb +++ b/spec/models/concerns/issuable_spec.rb @@ -223,6 +223,16 @@ describe Issuable do expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2]) end + it 'returns issues with a fuzzy matching description for a query shorter than 3 chars if told to do so' do + search = searchable_issue2.description.downcase.scan(/\w+/).sample[-1] + + expect(issuable_class.full_search(search, use_minimum_char_limit: false)).to include(searchable_issue2) + end + + it 'returns issues with a fuzzy matching title for a query shorter than 3 chars if told to do so' do + expect(issuable_class.full_search('i', use_minimum_char_limit: false)).to include(searchable_issue) + end + context 'when matching columns is "title"' do it 'returns issues with a matching title' do expect(issuable_class.full_search(searchable_issue.title, matched_columns: 'title'))