gitlab-org--gitlab-foss/spec/benchmarks/models/user_spec.rb

79 lines
1.9 KiB
Ruby
Raw Normal View History

require 'spec_helper'
describe User, benchmark: true do
describe '.all' do
before do
10.times { create(:user) }
end
benchmark_subject { User.all.to_a }
it { is_expected.to iterate_per_second(500) }
end
describe '.by_login' do
before do
%w{Alice Bob Eve}.each do |name|
create(:user,
email: "#{name}@gitlab.com",
username: name,
name: name)
end
end
# The iteration count is based on the query taking little over 1 ms when
# using PostgreSQL.
let(:iterations) { 900 }
describe 'using a capitalized username' do
benchmark_subject { User.by_login('Alice') }
it { is_expected.to iterate_per_second(iterations) }
end
describe 'using a lowercase username' do
benchmark_subject { User.by_login('alice') }
it { is_expected.to iterate_per_second(iterations) }
end
describe 'using a capitalized Email address' do
benchmark_subject { User.by_login('Alice@gitlab.com') }
it { is_expected.to iterate_per_second(iterations) }
end
describe 'using a lowercase Email address' do
benchmark_subject { User.by_login('alice@gitlab.com') }
it { is_expected.to iterate_per_second(iterations) }
end
end
Improve performance of User.find_by_any_email This query used to rely on a JOIN, effectively producing the following SQL: SELECT users.* FROM users LEFT OUTER JOIN emails ON emails.user_id = users.id WHERE (users.email = X OR emails.email = X) LIMIT 1; The use of a JOIN means having to scan over all Emails and users, join them together and then filter out the rows that don't match the criteria (though this step may be taken into account already when joining). In the new setup this query instead uses a sub-query, producing the following SQL: SELECT * FROM users WHERE id IN (select user_id FROM emails WHERE email = X) OR email = X LIMIT 1; This query has the benefit that it: 1. Doesn't have to JOIN any rows 2. Only has to operate on a relatively small set of rows from the "emails" table. Since most users will only have a handful of Emails associated (certainly not hundreds or even thousands) the size of the set returned by the sub-query is small enough that it should not become problematic. Performance of the old versus new version can be measured using the following benchmark: # Save this in ./bench.rb require 'benchmark/ips' email = 'yorick@gitlab.com' def User.find_by_any_email_old(email) user_table = arel_table email_table = Email.arel_table query = user_table. project(user_table[Arel.star]). join(email_table, Arel::Nodes::OuterJoin). on(user_table[:id].eq(email_table[:user_id])). where(user_table[:email].eq(email).or(email_table[:email].eq(email))) find_by_sql(query.to_sql).first end Benchmark.ips do |bench| bench.report 'original' do User.find_by_any_email_old(email) end bench.report 'optimized' do User.find_by_any_email(email) end bench.compare! end Running this locally using "bundle exec rails r bench.rb" produces the following output: Calculating ------------------------------------- original 1.000 i/100ms optimized 93.000 i/100ms ------------------------------------------------- original 11.103 (± 0.0%) i/s - 56.000 optimized 948.713 (± 5.3%) i/s - 4.743k Comparison: optimized: 948.7 i/s original: 11.1 i/s - 85.45x slower In other words, the new setup is 85x faster compared to the old setup, at least when running this benchmark locally. For GitLab.com these improvements result in User.find_by_any_email taking only ~170 ms to run, instead of around 800 ms. While this is "only" an improvement of about 4.5 times (instead of 85x) it's still significantly better than before. Fixes #3242
2015-10-28 13:43:27 +00:00
describe '.find_by_any_email' do
let(:user) { create(:user) }
describe 'using a user with only a single Email address' do
let(:email) { user.email }
benchmark_subject { User.find_by_any_email(email) }
it { is_expected.to iterate_per_second(1000) }
Improve performance of User.find_by_any_email This query used to rely on a JOIN, effectively producing the following SQL: SELECT users.* FROM users LEFT OUTER JOIN emails ON emails.user_id = users.id WHERE (users.email = X OR emails.email = X) LIMIT 1; The use of a JOIN means having to scan over all Emails and users, join them together and then filter out the rows that don't match the criteria (though this step may be taken into account already when joining). In the new setup this query instead uses a sub-query, producing the following SQL: SELECT * FROM users WHERE id IN (select user_id FROM emails WHERE email = X) OR email = X LIMIT 1; This query has the benefit that it: 1. Doesn't have to JOIN any rows 2. Only has to operate on a relatively small set of rows from the "emails" table. Since most users will only have a handful of Emails associated (certainly not hundreds or even thousands) the size of the set returned by the sub-query is small enough that it should not become problematic. Performance of the old versus new version can be measured using the following benchmark: # Save this in ./bench.rb require 'benchmark/ips' email = 'yorick@gitlab.com' def User.find_by_any_email_old(email) user_table = arel_table email_table = Email.arel_table query = user_table. project(user_table[Arel.star]). join(email_table, Arel::Nodes::OuterJoin). on(user_table[:id].eq(email_table[:user_id])). where(user_table[:email].eq(email).or(email_table[:email].eq(email))) find_by_sql(query.to_sql).first end Benchmark.ips do |bench| bench.report 'original' do User.find_by_any_email_old(email) end bench.report 'optimized' do User.find_by_any_email(email) end bench.compare! end Running this locally using "bundle exec rails r bench.rb" produces the following output: Calculating ------------------------------------- original 1.000 i/100ms optimized 93.000 i/100ms ------------------------------------------------- original 11.103 (± 0.0%) i/s - 56.000 optimized 948.713 (± 5.3%) i/s - 4.743k Comparison: optimized: 948.7 i/s original: 11.1 i/s - 85.45x slower In other words, the new setup is 85x faster compared to the old setup, at least when running this benchmark locally. For GitLab.com these improvements result in User.find_by_any_email taking only ~170 ms to run, instead of around 800 ms. While this is "only" an improvement of about 4.5 times (instead of 85x) it's still significantly better than before. Fixes #3242
2015-10-28 13:43:27 +00:00
end
describe 'using a user with multiple Email addresses' do
let(:email) { user.emails.first.email }
benchmark_subject { User.find_by_any_email(email) }
before do
10.times do
user.emails.create(email: FFaker::Internet.email)
end
end
it { is_expected.to iterate_per_second(1000) }
Improve performance of User.find_by_any_email This query used to rely on a JOIN, effectively producing the following SQL: SELECT users.* FROM users LEFT OUTER JOIN emails ON emails.user_id = users.id WHERE (users.email = X OR emails.email = X) LIMIT 1; The use of a JOIN means having to scan over all Emails and users, join them together and then filter out the rows that don't match the criteria (though this step may be taken into account already when joining). In the new setup this query instead uses a sub-query, producing the following SQL: SELECT * FROM users WHERE id IN (select user_id FROM emails WHERE email = X) OR email = X LIMIT 1; This query has the benefit that it: 1. Doesn't have to JOIN any rows 2. Only has to operate on a relatively small set of rows from the "emails" table. Since most users will only have a handful of Emails associated (certainly not hundreds or even thousands) the size of the set returned by the sub-query is small enough that it should not become problematic. Performance of the old versus new version can be measured using the following benchmark: # Save this in ./bench.rb require 'benchmark/ips' email = 'yorick@gitlab.com' def User.find_by_any_email_old(email) user_table = arel_table email_table = Email.arel_table query = user_table. project(user_table[Arel.star]). join(email_table, Arel::Nodes::OuterJoin). on(user_table[:id].eq(email_table[:user_id])). where(user_table[:email].eq(email).or(email_table[:email].eq(email))) find_by_sql(query.to_sql).first end Benchmark.ips do |bench| bench.report 'original' do User.find_by_any_email_old(email) end bench.report 'optimized' do User.find_by_any_email(email) end bench.compare! end Running this locally using "bundle exec rails r bench.rb" produces the following output: Calculating ------------------------------------- original 1.000 i/100ms optimized 93.000 i/100ms ------------------------------------------------- original 11.103 (± 0.0%) i/s - 56.000 optimized 948.713 (± 5.3%) i/s - 4.743k Comparison: optimized: 948.7 i/s original: 11.1 i/s - 85.45x slower In other words, the new setup is 85x faster compared to the old setup, at least when running this benchmark locally. For GitLab.com these improvements result in User.find_by_any_email taking only ~170 ms to run, instead of around 800 ms. While this is "only" an improvement of about 4.5 times (instead of 85x) it's still significantly better than before. Fixes #3242
2015-10-28 13:43:27 +00:00
end
end
end