Parallelize tests only when overhead is justified

Parallelizing tests has a cost in terms of database setup and fixture loading. This change makes Rails disable parallelization when the number of tests is below a configurable threshold. When running tests in parallel each process gets its own database instance. On each execution, each process will update each database schema (if needed) and load all the fixtures. This can be very expensive for non trivial datasets. As an example, for HEY, when running a single file with 18 tests, running tests in parallel in my box adds an overhead of 13 seconds versus not parallelizing them. Of course parallelizing is totally worthy when there are many tests to run, but not when running just a few tests. The threshold is configurable via config.active_support.test_parallelization_minimum_number_of_tests, which is 30 50 by default. This also adds some tracing to know how tests are being executed: When in parallel: ``` Running 2829 tests in parallel in 8 processes ``` When not in parallel: ``` Running 15 tests in a single process (parallelization threshold is 30) ```
2022-11-09 12:12:34 -05:00 · 2021-07-12 17:37:52 +02:00 · 2021-07-12 17:37:52 +02:00 · ecc5afed30
commit ecc5afed30
parent d364cfb34e
8 changed files with 140 additions and 15 deletions
--- a/activesupport/CHANGELOG.md
+++ b/activesupport/CHANGELOG.md
@ -1,3 +1,17 @@
+*   Parallelize tests only when overhead is justified by the number of them
+
+    Running tests in parallel adds overhead in terms of database
+    setup and fixture loading. Now, Rails will only parallelize test executions when
+    there are enough tests to make it worth it.
+
+    This threshold is 50 by default, and is configurable via:
+
+    ```ruby
+    config.active_support.test_parallelization_minimum_number_of_tests = 100
+    ```
+
+    *Jorge Manrubia*
+
 *   OpenSSL constants are now used for Digest computations.

    *Dirkjan Bussink*
--- a/activesupport/lib/active_support.rb
+++ b/activesupport/lib/active_support.rb
@ -88,6 +88,7 @@ module ActiveSupport

  cattr_accessor :test_order # :nodoc:
  cattr_accessor :test_parallelization_disabled, default: false # :nodoc:
+  cattr_accessor :test_parallelization_minimum_number_of_tests, default: 50 # :nodoc:

  def self.disable_test_parallelization!
    self.test_parallelization_disabled = true unless ENV["PARALLEL_WORKERS"]
--- a/activesupport/lib/active_support/test_case.rb
+++ b/activesupport/lib/active_support/test_case.rb
@ -12,6 +12,7 @@ require "active_support/testing/constant_lookup"
 require "active_support/testing/time_helpers"
 require "active_support/testing/file_fixtures"
 require "active_support/testing/parallelization"
+require "active_support/testing/parallelize_executor"
 require "concurrent/utility/processor_counter"

 module ActiveSupport
@ -77,20 +78,7 @@ module ActiveSupport

        return if workers <= 1 || ActiveSupport.test_parallelization_disabled

-        executor = case with
-                   when :processes
-                     Testing::Parallelization.new(workers)
-                   when :threads
-                     Minitest::Parallel::Executor.new(workers)
-                   else
-                     raise ArgumentError, "#{with} is not a supported parallelization executor."
-        end
-
-        self.lock_threads = false if defined?(self.lock_threads) && with == :threads
-
-        Minitest.parallel_executor = executor
-
-        parallelize_me!
+        Minitest.parallel_executor = ActiveSupport::Testing::ParallelizeExecutor.new(size: workers, with: with)
      end

      # Set up hook for parallel testing. This can be used if you have multiple
--- a/activesupport/lib/active_support/testing/parallelization.rb
+++ b/activesupport/lib/active_support/testing/parallelization.rb
@ -42,6 +42,10 @@ module ActiveSupport
        @queue_server << work
      end

+      def size
+        @worker_count
+      end
+
      def shutdown
        @queue_server.shutdown
        @worker_pool.each { |pid| Process.waitpid pid }
--- a/activesupport/lib/active_support/testing/parallelize_executor.rb
+++ b/activesupport/lib/active_support/testing/parallelize_executor.rb
@ -0,0 +1,72 @@
+# frozen_string_literal: true
+
+module ActiveSupport
+  module Testing
+    class ParallelizeExecutor # :nodoc:
+      attr_reader :size, :parallelize_with, :parallel_executor
+
+      def initialize(size:, with:)
+        @size = size
+        @parallelize_with = with
+        @parallel_executor = build_parallel_executor
+      end
+
+      def start
+        parallelize if should_parallelize?
+        show_execution_info
+
+        parallel_executor.start if parallelized?
+      end
+
+      def <<(work)
+        parallel_executor << work if parallelized?
+      end
+
+      def shutdown
+        parallel_executor.shutdown if parallelized?
+      end
+
+      private
+        def build_parallel_executor
+          case parallelize_with
+          when :processes
+            Testing::Parallelization.new(size)
+          when :threads
+            ActiveSupport::TestCase.lock_threads = false if defined?(ActiveSupport::TestCase.lock_threads)
+            Minitest::Parallel::Executor.new(size)
+          else
+            raise ArgumentError, "#{parallelize_with} is not a supported parallelization executor."
+          end
+        end
+
+        def parallelize
+          @parallelized = true
+          Minitest::Test.parallelize_me!
+        end
+
+        def parallelized?
+          @parallelized
+        end
+
+        def should_parallelize?
+          ENV["PARALLEL_WORKERS"] || tests_count > ActiveSupport.test_parallelization_minimum_number_of_tests
+        end
+
+        def tests_count
+          @tests_count ||= Minitest::Runnable.runnables.sum { |runnable| runnable.runnable_methods.size }
+        end
+
+        def show_execution_info
+          puts execution_info
+        end
+
+        def execution_info
+          if should_parallelize?
+            "Running #{tests_count} tests in parallel using #{parallel_executor.size} #{parallelize_with}"
+          else
+            "Running #{tests_count} tests in a single process (parallelization threshold is #{ActiveSupport.test_parallelization_minimum_number_of_tests})"
+          end
+        end
+    end
+  end
+end
--- a/guides/source/testing.md
+++ b/guides/source/testing.md
@ -569,6 +569,16 @@ end
 NOTE: With disabled transactional tests, you have to clean up any data tests
 create as changes are not automatically rolled back after the test completes.

+### Threshold to parallelize tests
+
+Running tests in parallel adds an overhead in terms of database setup and
+fixture loading. Because of this, Rails won't parallelize executions that involve
+fewer than 50 tests. You can configure this threshold in your `test.rb`:
+
+```ruby
+config.active_support.test_parallelization_minimum_number_of_tests = 100
+```
+
 The Test Database
 -----------------

--- a/railties/test/application/configuration_test.rb
+++ b/railties/test/application/configuration_test.rb
@ -2421,6 +2421,20 @@ module ApplicationTests
      assert_equal OpenSSL::Digest::SHA256, ActiveSupport::KeyGenerator.hash_digest_class
    end

+    test "ActiveSupport.test_parallelization_minimum_number_of_tests can be configured via config.active_support.test_parallelization_minimum_number_of_tests" do
+      remove_from_config '.*config\.load_defaults.*\n'
+
+      app_file "config/environments/test.rb", <<-RUBY
+        Rails.application.configure do
+          config.active_support.test_parallelization_minimum_number_of_tests = 1234
+        end
+      RUBY
+
+      app "test"
+
+      assert_equal 1234, ActiveSupport.test_parallelization_minimum_number_of_tests
+    end
+
    test "custom serializers should be able to set via config.active_job.custom_serializers in an initializer" do
      class ::DummySerializer < ActiveJob::Serializers::ObjectSerializer; end

--- a/railties/test/application/test_runner_test.rb
+++ b/railties/test/application/test_runner_test.rb
@ -567,9 +567,29 @@ module ApplicationTests
      output = run_test_command(file_name)

      assert_match %r{Finished in.*\n2 runs, 2 assertions}, output
+      assert_match %r{Running \d+ tests in parallel using \d+ processes}, output
      assert_no_match "create_table(:users)", output
    end

+    def test_avoid_paralleling_when_number_of_tests_if_below_threshold
+      exercise_parallelization_regardless_of_machine_core_count(with: :processes, threshold: 100)
+
+      file_name = create_parallel_processes_test_file
+
+      app_file "db/schema.rb", <<-RUBY
+        ActiveRecord::Schema.define(version: 1) do
+          create_table :users do |t|
+            t.string :name
+          end
+        end
+      RUBY
+
+      output = run_test_command(file_name)
+
+      assert_match %r{Running \d+ tests in a single process}, output
+      assert_no_match %r{Running \d+ tests in parallel using \d+ processes}, output
+    end
+
    def test_parallel_is_disabled_when_single_file_is_run
      exercise_parallelization_regardless_of_machine_core_count(with: :processes, force: false)

@ -1140,12 +1160,14 @@ module ApplicationTests
        RUBY
      end

-      def exercise_parallelization_regardless_of_machine_core_count(with:, force: true)
+      def exercise_parallelization_regardless_of_machine_core_count(with:, force: true, threshold: 0)
        file_content = ERB.new(<<-ERB, trim_mode: "-").result_with_hash(with: with.to_s, force: force)
          ENV["RAILS_ENV"] ||= "test"
          require_relative "../config/environment"
          require "rails/test_help"

+          ActiveSupport.test_parallelization_minimum_number_of_tests = #{threshold}
+
          class ActiveSupport::TestCase
            <%- if force -%>
            # Force parallelization, even with single files