41bfe82b7a
This optimises searching for users when using queries consisting out of one or two characters such as "ab". We optimise such cases by searching for `LOWER(name)` and `LOWER(username)` instead of using `ILIKE`. Using `LOWER` produces a _much_ better performing query. For example, when searching for all users matching the term "a" we'd produce the following plan: Limit (cost=637.69..637.74 rows=20 width=805) (actual time=41.983..41.995 rows=20 loops=1) Buffers: shared hit=8330 -> Sort (cost=637.69..638.61 rows=368 width=805) (actual time=41.982..41.990 rows=20 loops=1) Sort Key: (CASE WHEN ((name)::text = 'a'::text) THEN 0 WHEN ((username)::text = 'a'::text) THEN 1 WHEN ((email)::text = 'a'::text) THEN 2 ELSE 3 END), name Sort Method: top-N heapsort Memory: 35kB Buffers: shared hit=8330 -> Bitmap Heap Scan on users (cost=75.47..627.89 rows=368 width=805) (actual time=9.452..41.305 rows=277 loops=1) Recheck Cond: (((name)::text ~~* 'a'::text) OR ((username)::text ~~* 'a'::text) OR ((email)::text = 'a'::text)) Rows Removed by Index Recheck: 7601 Heap Blocks: exact=7636 Buffers: shared hit=8327 -> BitmapOr (cost=75.47..75.47 rows=368 width=0) (actual time=8.290..8.290 rows=0 loops=1) Buffers: shared hit=691 -> Bitmap Index Scan on index_users_on_name_trigram (cost=0.00..38.85 rows=180 width=0) (actual time=4.369..4.369 rows=4071 loops=1) Index Cond: ((name)::text ~~* 'a'::text) Buffers: shared hit=360 -> Bitmap Index Scan on index_users_on_username_trigram (cost=0.00..34.41 rows=188 width=0) (actual time=3.896..3.896 rows=4140 loops=1) Index Cond: ((username)::text ~~* 'a'::text) Buffers: shared hit=328 -> Bitmap Index Scan on users_email_key (cost=0.00..1.94 rows=1 width=0) (actual time=0.022..0.022 rows=0 loops=1) Index Cond: ((email)::text = 'a'::text) Buffers: shared hit=3 Planning time: 3.912 ms Execution time: 42.171 ms With the changes in this commit we now produce the following plan instead: Limit (cost=13257.48..13257.53 rows=20 width=805) (actual time=1.567..1.579 rows=20 loops=1) Buffers: shared hit=287 -> Sort (cost=13257.48..13280.93 rows=9379 width=805) (actual time=1.567..1.572 rows=20 loops=1) Sort Key: (CASE WHEN ((name)::text = 'a'::text) THEN 0 WHEN ((username)::text = 'a'::text) THEN 1 WHEN ((email)::text = 'a'::text) THEN 2 ELSE 3 END), name Sort Method: top-N heapsort Memory: 35kB Buffers: shared hit=287 -> Bitmap Heap Scan on users (cost=135.66..13007.91 rows=9379 width=805) (actual time=0.194..1.107 rows=277 loops=1) Recheck Cond: ((lower((name)::text) = 'a'::text) OR (lower((username)::text) = 'a'::text) OR ((email)::text = 'a'::text)) Heap Blocks: exact=277 Buffers: shared hit=287 -> BitmapOr (cost=135.66..135.66 rows=9379 width=0) (actual time=0.152..0.152 rows=0 loops=1) Buffers: shared hit=10 -> Bitmap Index Scan on yorick_test_users (cost=0.00..124.75 rows=9377 width=0) (actual time=0.101..0.101 rows=277 loops=1) Index Cond: (lower((name)::text) = 'a'::text) Buffers: shared hit=4 -> Bitmap Index Scan on index_on_users_lower_username (cost=0.00..1.94 rows=1 width=0) (actual time=0.035..0.035 rows=1 loops=1) Index Cond: (lower((username)::text) = 'a'::text) Buffers: shared hit=3 -> Bitmap Index Scan on users_email_key (cost=0.00..1.94 rows=1 width=0) (actual time=0.014..0.014 rows=0 loops=1) Index Cond: ((email)::text = 'a'::text) Buffers: shared hit=3 Planning time: 0.303 ms Execution time: 1.687 ms Here we can see the new query is 25 times faster compared to the old query.
197 lines
5.7 KiB
Ruby
197 lines
5.7 KiB
Ruby
require 'spec_helper'
|
|
|
|
describe Gitlab::SQL::Pattern do
|
|
describe '.to_pattern' do
|
|
subject(:to_pattern) { User.to_pattern(query) }
|
|
|
|
context 'when a query is shorter than 3 chars' do
|
|
let(:query) { '12' }
|
|
|
|
it 'returns exact matching pattern' do
|
|
expect(to_pattern).to eq('12')
|
|
end
|
|
end
|
|
|
|
context 'when a query with a escape character is shorter than 3 chars' do
|
|
let(:query) { '_2' }
|
|
|
|
it 'returns sanitized exact matching pattern' do
|
|
expect(to_pattern).to eq('\_2')
|
|
end
|
|
end
|
|
|
|
context 'when a query is equal to 3 chars' do
|
|
let(:query) { '123' }
|
|
|
|
it 'returns partial matching pattern' do
|
|
expect(to_pattern).to eq('%123%')
|
|
end
|
|
end
|
|
|
|
context 'when a query with a escape character is equal to 3 chars' do
|
|
let(:query) { '_23' }
|
|
|
|
it 'returns partial matching pattern' do
|
|
expect(to_pattern).to eq('%\_23%')
|
|
end
|
|
end
|
|
|
|
context 'when a query is longer than 3 chars' do
|
|
let(:query) { '1234' }
|
|
|
|
it 'returns partial matching pattern' do
|
|
expect(to_pattern).to eq('%1234%')
|
|
end
|
|
end
|
|
|
|
context 'when a query with a escape character is longer than 3 chars' do
|
|
let(:query) { '_234' }
|
|
|
|
it 'returns sanitized partial matching pattern' do
|
|
expect(to_pattern).to eq('%\_234%')
|
|
end
|
|
end
|
|
end
|
|
|
|
describe '.select_fuzzy_words' do
|
|
subject(:select_fuzzy_words) { Issue.select_fuzzy_words(query) }
|
|
|
|
context 'with a word equal to 3 chars' do
|
|
let(:query) { 'foo' }
|
|
|
|
it 'returns array cotaining a word' do
|
|
expect(select_fuzzy_words).to match_array(['foo'])
|
|
end
|
|
end
|
|
|
|
context 'with a word shorter than 3 chars' do
|
|
let(:query) { 'fo' }
|
|
|
|
it 'returns empty array' do
|
|
expect(select_fuzzy_words).to match_array([])
|
|
end
|
|
end
|
|
|
|
context 'with two words both equal to 3 chars' do
|
|
let(:query) { 'foo baz' }
|
|
|
|
it 'returns array containing two words' do
|
|
expect(select_fuzzy_words).to match_array(%w[foo baz])
|
|
end
|
|
end
|
|
|
|
context 'with two words divided by two spaces both equal to 3 chars' do
|
|
let(:query) { 'foo baz' }
|
|
|
|
it 'returns array containing two words' do
|
|
expect(select_fuzzy_words).to match_array(%w[foo baz])
|
|
end
|
|
end
|
|
|
|
context 'with two words equal to 3 chars and shorter than 3 chars' do
|
|
let(:query) { 'foo ba' }
|
|
|
|
it 'returns array containing a word' do
|
|
expect(select_fuzzy_words).to match_array(['foo'])
|
|
end
|
|
end
|
|
|
|
context 'with a multi-word surrounded by double quote' do
|
|
let(:query) { '"really bar"' }
|
|
|
|
it 'returns array containing a multi-word' do
|
|
expect(select_fuzzy_words).to match_array(['really bar'])
|
|
end
|
|
end
|
|
|
|
context 'with a multi-word surrounded by double quote and two words' do
|
|
let(:query) { 'foo "really bar" baz' }
|
|
|
|
it 'returns array containing a multi-word and tow words' do
|
|
expect(select_fuzzy_words).to match_array(['foo', 'really bar', 'baz'])
|
|
end
|
|
end
|
|
|
|
context 'with a multi-word surrounded by double quote missing a spece before the first double quote' do
|
|
let(:query) { 'foo"really bar"' }
|
|
|
|
it 'returns array containing two words with double quote' do
|
|
expect(select_fuzzy_words).to match_array(['foo"really', 'bar"'])
|
|
end
|
|
end
|
|
|
|
context 'with a multi-word surrounded by double quote missing a spece after the second double quote' do
|
|
let(:query) { '"really bar"baz' }
|
|
|
|
it 'returns array containing two words with double quote' do
|
|
expect(select_fuzzy_words).to match_array(['"really', 'bar"baz'])
|
|
end
|
|
end
|
|
|
|
context 'with two multi-word surrounded by double quote and two words' do
|
|
let(:query) { 'foo "really bar" baz "awesome feature"' }
|
|
|
|
it 'returns array containing two multi-words and tow words' do
|
|
expect(select_fuzzy_words).to match_array(['foo', 'really bar', 'baz', 'awesome feature'])
|
|
end
|
|
end
|
|
end
|
|
|
|
describe '.fuzzy_arel_match' do
|
|
subject(:fuzzy_arel_match) { Issue.fuzzy_arel_match(:title, query) }
|
|
|
|
context 'with a word equal to 3 chars' do
|
|
let(:query) { 'foo' }
|
|
|
|
it 'returns a single ILIKE condition' do
|
|
expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE '\%foo\%'/)
|
|
end
|
|
end
|
|
|
|
context 'with a word shorter than 3 chars' do
|
|
let(:query) { 'fo' }
|
|
|
|
it 'returns a single equality condition' do
|
|
expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo'/)
|
|
end
|
|
|
|
it 'uses LOWER instead of ILIKE when LOWER is enabled' do
|
|
rel = Issue.fuzzy_arel_match(:title, query, lower_exact_match: true)
|
|
|
|
expect(rel.to_sql).to match(/LOWER\(.*title.*\).*=.*'fo'/)
|
|
end
|
|
end
|
|
|
|
context 'with two words both equal to 3 chars' do
|
|
let(:query) { 'foo baz' }
|
|
|
|
it 'returns a joining LIKE condition using a AND' do
|
|
expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%'/)
|
|
end
|
|
end
|
|
|
|
context 'with two words both shorter than 3 chars' do
|
|
let(:query) { 'fo ba' }
|
|
|
|
it 'returns a single ILIKE condition' do
|
|
expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo ba'/)
|
|
end
|
|
end
|
|
|
|
context 'with two words, one shorter 3 chars' do
|
|
let(:query) { 'foo ba' }
|
|
|
|
it 'returns a single ILIKE condition using the longer word' do
|
|
expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%'/)
|
|
end
|
|
end
|
|
|
|
context 'with a multi-word surrounded by double quote and two words' do
|
|
let(:query) { 'foo "really bar" baz' }
|
|
|
|
it 'returns a joining LIKE condition using a AND' do
|
|
expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%' AND .*title.*I?LIKE '\%really bar\%'/)
|
|
end
|
|
end
|
|
end
|
|
end
|