Lower the memory allocations

It prevents the repeating hashes creation during the calculation of `object` and `object_changes`. The real-life case, discovered with a help of `memory_profiler` gem: 1. It saves `~35Mb` per bulk of 100 typical objects like `User`; 2. It saves `~875Mb` per Sidekiq process full of bulk jobs like P1, under default concurrency 25.
2019-03-08 08:17:31 +03:00 · 2019-03-08 08:17:31 +03:00 · cf17bc47a6
parent 43a0634589
commit cf17bc47a6
8 changed files with 200 additions and 33 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -18,6 +18,13 @@ recommendations of [keepachangelog.com](http://keepachangelog.com/).
 - [#1184](https://github.com/paper-trail-gem/paper_trail/pull/1184) -
  No need to calculate previous values of skipped attributes

+- [#1188](https://github.com/paper-trail-gem/paper_trail/pull/1188) -
+  Optimized the memory allocations during the building of every particular
+  Version object. That can help a lot for heavy bulk processing.
+  In additional we advise to use `json[b]` DB types for `object`
+  and `object_changes` Version columns, in order to reach best possible
+  RAM performance.
+
 ## 10.2.0 (2019-01-31)

 ### Breaking Changes
--- a/lib/paper_trail/events/base.rb
+++ b/lib/paper_trail/events/base.rb
@ -60,15 +60,28 @@ module PaperTrail

      # @api private
      def nonskipped_attributes_before_change(is_touch)
-        record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])
+        cache_changed_attributes do
+          record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])

-        Hash[record_attributes.map do |k, v|
-          if @record.class.column_names.include?(k)
-            [k, attribute_in_previous_version(k, is_touch)]
-          else
-            [k, v]
+          record_attributes.each_key do |k|
+            if @record.class.column_names.include?(k)
+              record_attributes[k] = attribute_in_previous_version(k, is_touch)
+            end
          end
-        end]
+        end
+      end
+
+      # Rails 5.1 changed the API of `ActiveRecord::Dirty`.
+      # @api private
+      def cache_changed_attributes
+        if RAILS_GTE_5_1
+          # Everything works fine as it is
+          yield
+        else
+          # Any particular call to `changed_attributes` produces the huge memory allocation.
+          # Lets use the generic AR workaround for that.
+          @record.send(:cache_changed_attributes) { yield }
+        end
      end

      # Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
@ -110,7 +123,8 @@ module PaperTrail

      # @api private
      def changed_in_latest_version
-        changes_in_latest_version.keys
+        # Memoized to reduce memory usage
+        @changed_in_latest_version ||= changes_in_latest_version.keys
      end

      # Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
@ -118,10 +132,13 @@ module PaperTrail
      #
      # @api private
      def changes_in_latest_version
-        if @in_after_callback && RAILS_GTE_5_1
-          @record.saved_changes
-        else
-          @record.changes
+        # Memoized to reduce memory usage
+        @changes_in_latest_version ||= begin
+          if @in_after_callback && RAILS_GTE_5_1
+            @record.saved_changes
+          else
+            @record.changes
+          end
        end
      end

@ -202,16 +219,19 @@ module PaperTrail

      # @api private
      def notably_changed
-        only = @record.paper_trail_options[:only].dup
-        # Remove Hash arguments and then evaluate whether the attributes (the
-        # keys of the hash) should also get pushed into the collection.
-        only.delete_if do |obj|
-          obj.is_a?(Hash) &&
-            obj.each { |attr, condition|
-              only << attr if condition.respond_to?(:call) && condition.call(@record)
-            }
+        # Memoized to reduce memory usage
+        @notably_changed ||= begin
+          only = @record.paper_trail_options[:only].dup
+          # Remove Hash arguments and then evaluate whether the attributes (the
+          # keys of the hash) should also get pushed into the collection.
+          only.delete_if do |obj|
+            obj.is_a?(Hash) &&
+              obj.each { |attr, condition|
+                only << attr if condition.respond_to?(:call) && condition.call(@record)
+              }
+          end
+          only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
        end
-        only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
      end

      # Returns hash of attributes (with appropriate attributes serialized),
--- a/lib/paper_trail/events/create.rb
+++ b/lib/paper_trail/events/create.rb
@ -13,6 +13,7 @@ module PaperTrail
      # @api private
      def data
        data = {
+          item: @record,
          event: @record.paper_trail_event || "create",
          whodunnit: PaperTrail.request.whodunnit
        }
--- a/lib/paper_trail/events/update.rb
+++ b/lib/paper_trail/events/update.rb
@ -25,6 +25,7 @@ module PaperTrail
      # @api private
      def data
        data = {
+          item: @record,
          event: @record.paper_trail_event || "update",
          whodunnit: PaperTrail.request.whodunnit
        }
--- a/lib/paper_trail/record_trail.rb
+++ b/lib/paper_trail/record_trail.rb
@ -67,14 +67,26 @@ module PaperTrail

    def record_create
      return unless enabled?
-      event = Events::Create.new(@record, true)
+
+      build_version_on_create(in_after_callback: true).tap do |version|
+        version.save!
+        # One more Version object has been created. Need to invalidate
+        # the `versions` association cache, otherwise the new version
+        # can stay invisible within that association
+        versions.reset
+      end
+    end
+
+    # @api private
+    def build_version_on_create(in_after_callback:)
+      event = Events::Create.new(@record, in_after_callback)

      # Merge data from `Event` with data from PT-AT. We no longer use
      # `data_for_create` but PT-AT still does.
-      data = event.data.merge(data_for_create)
+      data = event.data.merge!(data_for_create)

-      versions_assoc = @record.send(@record.class.versions_association_name)
-      versions_assoc.create!(data)
+      # Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
+      @record.class.paper_trail.version_class.new(data)
    end

    # PT-AT extends this method to add its transaction id.
@ -119,20 +131,36 @@ module PaperTrail
    # paper_trail-association_tracking
    def record_update(force:, in_after_callback:, is_touch:)
      return unless enabled?
+
+      version = build_version_on_update(
+        force: force,
+        in_after_callback: in_after_callback,
+        is_touch: is_touch
+      )
+      return unless version
+
+      if version.save
+        # One more Version object has been created. Need to invalidate
+        # the `versions` association cache, otherwise the new version
+        # can stay invisible within that association
+        versions.reset
+        version
+      else
+        log_version_errors(version, :update)
+      end
+    end
+
+    # @api private
+    def build_version_on_update(force:, in_after_callback:, is_touch:)
      event = Events::Update.new(@record, in_after_callback, is_touch, nil)
      return unless force || event.changed_notably?

      # Merge data from `Event` with data from PT-AT. We no longer use
      # `data_for_update` but PT-AT still does.
-      data = event.data.merge(data_for_update)
+      data = event.data.merge!(data_for_update)

-      versions_assoc = @record.send(@record.class.versions_association_name)
-      version = versions_assoc.create(data)
-      if version.errors.any?
-        log_version_errors(version, :update)
-      else
-        version
-      end
+      # Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
+      @record.class.paper_trail.version_class.new(data)
    end

    # PT-AT extends this method to add its transaction id.
--- a/paper_trail.gemspec
+++ b/paper_trail.gemspec
@ -35,6 +35,7 @@ has been destroyed.
  s.add_development_dependency "byebug", "~> 10.0"
  s.add_development_dependency "ffaker", "~> 2.8"
  s.add_development_dependency "generator_spec", "~> 0.9.4"
+  s.add_development_dependency "memory_profiler", "~> 0.9.12"
  s.add_development_dependency "mysql2", "~> 0.5.2"
  s.add_development_dependency "paper_trail-association_tracking", "~> 2.0.0"
  s.add_development_dependency "pg", "~> 1.0"
--- a/spec/paper_trail/model_spec.rb
+++ b/spec/paper_trail/model_spec.rb
@ -1,6 +1,7 @@
 # frozen_string_literal: true

 require "spec_helper"
+require "support/performance_helpers"

 RSpec.describe(::PaperTrail, versioning: true) do
  context "a new record" do
@ -868,4 +869,74 @@ RSpec.describe(::PaperTrail, versioning: true) do
      expect(widget.versions.empty?).to(eq(true))
    end
  end
+
+  context "Memory allocation of" do
+    let(:widget) do
+      Widget.new(
+        name: "Warble",
+        a_text: "The quick brown fox",
+        an_integer: 42,
+        a_float: 153.01,
+        a_decimal: 2.71828,
+        a_boolean: true
+      )
+    end
+
+    before do
+      # Json fields for `object` & `object_changes` attributes is most efficient way
+      # to do the things - this way we will save even more RAM, as well as will skip
+      # the whole YAML serialization
+      allow(PaperTrail::Version).to receive(:object_changes_col_is_json?).and_return(true)
+      allow(PaperTrail::Version).to receive(:object_col_is_json?).and_return(true)
+
+      # Force the loading of all lazy things like class definitions,
+      # in order to get the pure benchmark
+      version_building.call
+    end
+
+    describe "#build_version_on_create" do
+      let(:version_building) do
+        -> { widget.paper_trail.build_version_on_create(in_after_callback: false) }
+      end
+
+      it "is frugal enough" do
+        # Some time ago there was 95kbs..
+        # At the time of commit the test passes with assertion on 17kbs.
+        # Lets assert 20kbs then, to avoid flaky fails.
+        expect(&version_building).to allocate_less_than(20).kilobytes
+      end
+    end
+
+    describe "#build_version_on_update" do
+      let(:widget) do
+        super().tap do |w|
+          w.save!
+          w.attributes = {
+            name: "Dostoyevsky",
+            a_text: "The slow yellow mouse",
+            an_integer: 84,
+            a_float: 306.02,
+            a_decimal: 5.43656,
+            a_boolean: false
+          }
+        end
+      end
+      let(:version_building) do
+        lambda do
+          widget.paper_trail.build_version_on_update(
+            force: false,
+            in_after_callback: false,
+            is_touch: false
+          )
+        end
+      end
+
+      it "is frugal enough" do
+        # Some time ago there was 144kbs..
+        # At the time of commit the test passes with assertion on 27kbs.
+        # Lets assert 35kbs then, to avoid flaky fails.
+        expect(&version_building).to allocate_less_than(35).kilobytes
+      end
+    end
+  end
 end
--- a/spec/support/performance_helpers.rb
+++ b/spec/support/performance_helpers.rb
@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+require "memory_profiler"
+
+RSpec::Matchers.define :allocate_less_than do |expected|
+  supports_block_expectations
+
+  chain :bytes do
+    @scale = :bs
+  end
+
+  chain :kilobytes do
+    @scale = :kbs
+  end
+
+  chain :and_print_report do
+    @report = true
+  end
+
+  match do |actual|
+    @scale ||= :bs
+
+    benchmark = MemoryProfiler.report(ignore_files: /rspec/) { actual.call }
+
+    if @report
+      benchmark.pretty_print(detailed_report: true, scale_bytes: true)
+    end
+
+    @allocated = benchmark.total_allocated_memsize
+    @allocated /= 1024 if @scale == :kbs
+    @allocated <= expected
+  end
+
+  failure_message do
+    "expected that example will allocate less than #{expected}#{@scale},"\
+    " but allocated #{@allocated}#{@scale}"
+  end
+end