Parallelize tests only when overhead is justified

Parallelizing tests has a cost in terms of database setup and fixture
loading. This change makes Rails disable parallelization when the number
of tests is below a configurable threshold.

When running tests in parallel each process gets its own database
instance. On each execution, each process will update each database
schema (if needed) and load all the fixtures. This can be very expensive
for non trivial datasets.

As an example, for HEY, when running a single file with 18 tests,
running tests in parallel in my box adds an overhead of 13 seconds
versus not parallelizing them. Of course parallelizing is totally worthy
when there are many tests to run, but not when running just a few tests.

The threshold is configurable via
config.active_support.test_parallelization_minimum_number_of_tests,
which is 30 50 by default.

This also adds some tracing to know how tests are being executed:

When in parallel:

```
Running 2829 tests in parallel in 8 processes
```

When not in parallel:

```
Running 15 tests in a single process (parallelization threshold is 30)
```
This commit is contained in:
Jorge Manrubia 2021-07-12 17:37:52 +02:00
parent d364cfb34e
commit ecc5afed30
8 changed files with 140 additions and 15 deletions

View File

@ -1,3 +1,17 @@
* Parallelize tests only when overhead is justified by the number of them
Running tests in parallel adds overhead in terms of database
setup and fixture loading. Now, Rails will only parallelize test executions when
there are enough tests to make it worth it.
This threshold is 50 by default, and is configurable via:
```ruby
config.active_support.test_parallelization_minimum_number_of_tests = 100
```
*Jorge Manrubia*
* OpenSSL constants are now used for Digest computations.
*Dirkjan Bussink*

View File

@ -88,6 +88,7 @@ module ActiveSupport
cattr_accessor :test_order # :nodoc:
cattr_accessor :test_parallelization_disabled, default: false # :nodoc:
cattr_accessor :test_parallelization_minimum_number_of_tests, default: 50 # :nodoc:
def self.disable_test_parallelization!
self.test_parallelization_disabled = true unless ENV["PARALLEL_WORKERS"]

View File

@ -12,6 +12,7 @@ require "active_support/testing/constant_lookup"
require "active_support/testing/time_helpers"
require "active_support/testing/file_fixtures"
require "active_support/testing/parallelization"
require "active_support/testing/parallelize_executor"
require "concurrent/utility/processor_counter"
module ActiveSupport
@ -77,20 +78,7 @@ module ActiveSupport
return if workers <= 1 || ActiveSupport.test_parallelization_disabled
executor = case with
when :processes
Testing::Parallelization.new(workers)
when :threads
Minitest::Parallel::Executor.new(workers)
else
raise ArgumentError, "#{with} is not a supported parallelization executor."
end
self.lock_threads = false if defined?(self.lock_threads) && with == :threads
Minitest.parallel_executor = executor
parallelize_me!
Minitest.parallel_executor = ActiveSupport::Testing::ParallelizeExecutor.new(size: workers, with: with)
end
# Set up hook for parallel testing. This can be used if you have multiple

View File

@ -42,6 +42,10 @@ module ActiveSupport
@queue_server << work
end
def size
@worker_count
end
def shutdown
@queue_server.shutdown
@worker_pool.each { |pid| Process.waitpid pid }

View File

@ -0,0 +1,72 @@
# frozen_string_literal: true
module ActiveSupport
module Testing
class ParallelizeExecutor # :nodoc:
attr_reader :size, :parallelize_with, :parallel_executor
def initialize(size:, with:)
@size = size
@parallelize_with = with
@parallel_executor = build_parallel_executor
end
def start
parallelize if should_parallelize?
show_execution_info
parallel_executor.start if parallelized?
end
def <<(work)
parallel_executor << work if parallelized?
end
def shutdown
parallel_executor.shutdown if parallelized?
end
private
def build_parallel_executor
case parallelize_with
when :processes
Testing::Parallelization.new(size)
when :threads
ActiveSupport::TestCase.lock_threads = false if defined?(ActiveSupport::TestCase.lock_threads)
Minitest::Parallel::Executor.new(size)
else
raise ArgumentError, "#{parallelize_with} is not a supported parallelization executor."
end
end
def parallelize
@parallelized = true
Minitest::Test.parallelize_me!
end
def parallelized?
@parallelized
end
def should_parallelize?
ENV["PARALLEL_WORKERS"] || tests_count > ActiveSupport.test_parallelization_minimum_number_of_tests
end
def tests_count
@tests_count ||= Minitest::Runnable.runnables.sum { |runnable| runnable.runnable_methods.size }
end
def show_execution_info
puts execution_info
end
def execution_info
if should_parallelize?
"Running #{tests_count} tests in parallel using #{parallel_executor.size} #{parallelize_with}"
else
"Running #{tests_count} tests in a single process (parallelization threshold is #{ActiveSupport.test_parallelization_minimum_number_of_tests})"
end
end
end
end
end

View File

@ -569,6 +569,16 @@ end
NOTE: With disabled transactional tests, you have to clean up any data tests
create as changes are not automatically rolled back after the test completes.
### Threshold to parallelize tests
Running tests in parallel adds an overhead in terms of database setup and
fixture loading. Because of this, Rails won't parallelize executions that involve
fewer than 50 tests. You can configure this threshold in your `test.rb`:
```ruby
config.active_support.test_parallelization_minimum_number_of_tests = 100
```
The Test Database
-----------------

View File

@ -2421,6 +2421,20 @@ module ApplicationTests
assert_equal OpenSSL::Digest::SHA256, ActiveSupport::KeyGenerator.hash_digest_class
end
test "ActiveSupport.test_parallelization_minimum_number_of_tests can be configured via config.active_support.test_parallelization_minimum_number_of_tests" do
remove_from_config '.*config\.load_defaults.*\n'
app_file "config/environments/test.rb", <<-RUBY
Rails.application.configure do
config.active_support.test_parallelization_minimum_number_of_tests = 1234
end
RUBY
app "test"
assert_equal 1234, ActiveSupport.test_parallelization_minimum_number_of_tests
end
test "custom serializers should be able to set via config.active_job.custom_serializers in an initializer" do
class ::DummySerializer < ActiveJob::Serializers::ObjectSerializer; end

View File

@ -567,9 +567,29 @@ module ApplicationTests
output = run_test_command(file_name)
assert_match %r{Finished in.*\n2 runs, 2 assertions}, output
assert_match %r{Running \d+ tests in parallel using \d+ processes}, output
assert_no_match "create_table(:users)", output
end
def test_avoid_paralleling_when_number_of_tests_if_below_threshold
exercise_parallelization_regardless_of_machine_core_count(with: :processes, threshold: 100)
file_name = create_parallel_processes_test_file
app_file "db/schema.rb", <<-RUBY
ActiveRecord::Schema.define(version: 1) do
create_table :users do |t|
t.string :name
end
end
RUBY
output = run_test_command(file_name)
assert_match %r{Running \d+ tests in a single process}, output
assert_no_match %r{Running \d+ tests in parallel using \d+ processes}, output
end
def test_parallel_is_disabled_when_single_file_is_run
exercise_parallelization_regardless_of_machine_core_count(with: :processes, force: false)
@ -1140,12 +1160,14 @@ module ApplicationTests
RUBY
end
def exercise_parallelization_regardless_of_machine_core_count(with:, force: true)
def exercise_parallelization_regardless_of_machine_core_count(with:, force: true, threshold: 0)
file_content = ERB.new(<<-ERB, trim_mode: "-").result_with_hash(with: with.to_s, force: force)
ENV["RAILS_ENV"] ||= "test"
require_relative "../config/environment"
require "rails/test_help"
ActiveSupport.test_parallelization_minimum_number_of_tests = #{threshold}
class ActiveSupport::TestCase
<%- if force -%>
# Force parallelization, even with single files