From cf17bc47a64b58ec63c3b59dd633c600989a2b55 Mon Sep 17 00:00:00 2001 From: Sergey Tokarenko Date: Fri, 8 Mar 2019 08:17:31 +0300 Subject: [PATCH] Lower the memory allocations It prevents the repeating hashes creation during the calculation of `object` and `object_changes`. The real-life case, discovered with a help of `memory_profiler` gem: 1. It saves `~35Mb` per bulk of 100 typical objects like `User`; 2. It saves `~875Mb` per Sidekiq process full of bulk jobs like P1, under default concurrency 25. --- CHANGELOG.md | 7 +++ lib/paper_trail/events/base.rb | 62 ++++++++++++++++--------- lib/paper_trail/events/create.rb | 1 + lib/paper_trail/events/update.rb | 1 + lib/paper_trail/record_trail.rb | 52 ++++++++++++++++----- paper_trail.gemspec | 1 + spec/paper_trail/model_spec.rb | 71 +++++++++++++++++++++++++++++ spec/support/performance_helpers.rb | 38 +++++++++++++++ 8 files changed, 200 insertions(+), 33 deletions(-) create mode 100644 spec/support/performance_helpers.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 46cc5f4a..215f1cba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,13 @@ recommendations of [keepachangelog.com](http://keepachangelog.com/). - [#1184](https://github.com/paper-trail-gem/paper_trail/pull/1184) - No need to calculate previous values of skipped attributes +- [#1188](https://github.com/paper-trail-gem/paper_trail/pull/1188) - + Optimized the memory allocations during the building of every particular + Version object. That can help a lot for heavy bulk processing. + In additional we advise to use `json[b]` DB types for `object` + and `object_changes` Version columns, in order to reach best possible + RAM performance. + ## 10.2.0 (2019-01-31) ### Breaking Changes diff --git a/lib/paper_trail/events/base.rb b/lib/paper_trail/events/base.rb index 0f7999d7..98df8140 100644 --- a/lib/paper_trail/events/base.rb +++ b/lib/paper_trail/events/base.rb @@ -60,15 +60,28 @@ module PaperTrail # @api private def nonskipped_attributes_before_change(is_touch) - record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip]) + cache_changed_attributes do + record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip]) - Hash[record_attributes.map do |k, v| - if @record.class.column_names.include?(k) - [k, attribute_in_previous_version(k, is_touch)] - else - [k, v] + record_attributes.each_key do |k| + if @record.class.column_names.include?(k) + record_attributes[k] = attribute_in_previous_version(k, is_touch) + end end - end] + end + end + + # Rails 5.1 changed the API of `ActiveRecord::Dirty`. + # @api private + def cache_changed_attributes + if RAILS_GTE_5_1 + # Everything works fine as it is + yield + else + # Any particular call to `changed_attributes` produces the huge memory allocation. + # Lets use the generic AR workaround for that. + @record.send(:cache_changed_attributes) { yield } + end end # Rails 5.1 changed the API of `ActiveRecord::Dirty`. See @@ -110,7 +123,8 @@ module PaperTrail # @api private def changed_in_latest_version - changes_in_latest_version.keys + # Memoized to reduce memory usage + @changed_in_latest_version ||= changes_in_latest_version.keys end # Rails 5.1 changed the API of `ActiveRecord::Dirty`. See @@ -118,10 +132,13 @@ module PaperTrail # # @api private def changes_in_latest_version - if @in_after_callback && RAILS_GTE_5_1 - @record.saved_changes - else - @record.changes + # Memoized to reduce memory usage + @changes_in_latest_version ||= begin + if @in_after_callback && RAILS_GTE_5_1 + @record.saved_changes + else + @record.changes + end end end @@ -202,16 +219,19 @@ module PaperTrail # @api private def notably_changed - only = @record.paper_trail_options[:only].dup - # Remove Hash arguments and then evaluate whether the attributes (the - # keys of the hash) should also get pushed into the collection. - only.delete_if do |obj| - obj.is_a?(Hash) && - obj.each { |attr, condition| - only << attr if condition.respond_to?(:call) && condition.call(@record) - } + # Memoized to reduce memory usage + @notably_changed ||= begin + only = @record.paper_trail_options[:only].dup + # Remove Hash arguments and then evaluate whether the attributes (the + # keys of the hash) should also get pushed into the collection. + only.delete_if do |obj| + obj.is_a?(Hash) && + obj.each { |attr, condition| + only << attr if condition.respond_to?(:call) && condition.call(@record) + } + end + only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only) end - only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only) end # Returns hash of attributes (with appropriate attributes serialized), diff --git a/lib/paper_trail/events/create.rb b/lib/paper_trail/events/create.rb index a04adda6..8042f378 100644 --- a/lib/paper_trail/events/create.rb +++ b/lib/paper_trail/events/create.rb @@ -13,6 +13,7 @@ module PaperTrail # @api private def data data = { + item: @record, event: @record.paper_trail_event || "create", whodunnit: PaperTrail.request.whodunnit } diff --git a/lib/paper_trail/events/update.rb b/lib/paper_trail/events/update.rb index 01b3ecf3..8516bc34 100644 --- a/lib/paper_trail/events/update.rb +++ b/lib/paper_trail/events/update.rb @@ -25,6 +25,7 @@ module PaperTrail # @api private def data data = { + item: @record, event: @record.paper_trail_event || "update", whodunnit: PaperTrail.request.whodunnit } diff --git a/lib/paper_trail/record_trail.rb b/lib/paper_trail/record_trail.rb index a2371310..e20e1f58 100644 --- a/lib/paper_trail/record_trail.rb +++ b/lib/paper_trail/record_trail.rb @@ -67,14 +67,26 @@ module PaperTrail def record_create return unless enabled? - event = Events::Create.new(@record, true) + + build_version_on_create(in_after_callback: true).tap do |version| + version.save! + # One more Version object has been created. Need to invalidate + # the `versions` association cache, otherwise the new version + # can stay invisible within that association + versions.reset + end + end + + # @api private + def build_version_on_create(in_after_callback:) + event = Events::Create.new(@record, in_after_callback) # Merge data from `Event` with data from PT-AT. We no longer use # `data_for_create` but PT-AT still does. - data = event.data.merge(data_for_create) + data = event.data.merge!(data_for_create) - versions_assoc = @record.send(@record.class.versions_association_name) - versions_assoc.create!(data) + # Pure `version_class.new` reduces memory usage compared to `versions_assoc.build` + @record.class.paper_trail.version_class.new(data) end # PT-AT extends this method to add its transaction id. @@ -119,20 +131,36 @@ module PaperTrail # paper_trail-association_tracking def record_update(force:, in_after_callback:, is_touch:) return unless enabled? + + version = build_version_on_update( + force: force, + in_after_callback: in_after_callback, + is_touch: is_touch + ) + return unless version + + if version.save + # One more Version object has been created. Need to invalidate + # the `versions` association cache, otherwise the new version + # can stay invisible within that association + versions.reset + version + else + log_version_errors(version, :update) + end + end + + # @api private + def build_version_on_update(force:, in_after_callback:, is_touch:) event = Events::Update.new(@record, in_after_callback, is_touch, nil) return unless force || event.changed_notably? # Merge data from `Event` with data from PT-AT. We no longer use # `data_for_update` but PT-AT still does. - data = event.data.merge(data_for_update) + data = event.data.merge!(data_for_update) - versions_assoc = @record.send(@record.class.versions_association_name) - version = versions_assoc.create(data) - if version.errors.any? - log_version_errors(version, :update) - else - version - end + # Pure `version_class.new` reduces memory usage compared to `versions_assoc.build` + @record.class.paper_trail.version_class.new(data) end # PT-AT extends this method to add its transaction id. diff --git a/paper_trail.gemspec b/paper_trail.gemspec index 53134339..9dc6c098 100644 --- a/paper_trail.gemspec +++ b/paper_trail.gemspec @@ -35,6 +35,7 @@ has been destroyed. s.add_development_dependency "byebug", "~> 10.0" s.add_development_dependency "ffaker", "~> 2.8" s.add_development_dependency "generator_spec", "~> 0.9.4" + s.add_development_dependency "memory_profiler", "~> 0.9.12" s.add_development_dependency "mysql2", "~> 0.5.2" s.add_development_dependency "paper_trail-association_tracking", "~> 2.0.0" s.add_development_dependency "pg", "~> 1.0" diff --git a/spec/paper_trail/model_spec.rb b/spec/paper_trail/model_spec.rb index 246e6503..cf757763 100644 --- a/spec/paper_trail/model_spec.rb +++ b/spec/paper_trail/model_spec.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "spec_helper" +require "support/performance_helpers" RSpec.describe(::PaperTrail, versioning: true) do context "a new record" do @@ -868,4 +869,74 @@ RSpec.describe(::PaperTrail, versioning: true) do expect(widget.versions.empty?).to(eq(true)) end end + + context "Memory allocation of" do + let(:widget) do + Widget.new( + name: "Warble", + a_text: "The quick brown fox", + an_integer: 42, + a_float: 153.01, + a_decimal: 2.71828, + a_boolean: true + ) + end + + before do + # Json fields for `object` & `object_changes` attributes is most efficient way + # to do the things - this way we will save even more RAM, as well as will skip + # the whole YAML serialization + allow(PaperTrail::Version).to receive(:object_changes_col_is_json?).and_return(true) + allow(PaperTrail::Version).to receive(:object_col_is_json?).and_return(true) + + # Force the loading of all lazy things like class definitions, + # in order to get the pure benchmark + version_building.call + end + + describe "#build_version_on_create" do + let(:version_building) do + -> { widget.paper_trail.build_version_on_create(in_after_callback: false) } + end + + it "is frugal enough" do + # Some time ago there was 95kbs.. + # At the time of commit the test passes with assertion on 17kbs. + # Lets assert 20kbs then, to avoid flaky fails. + expect(&version_building).to allocate_less_than(20).kilobytes + end + end + + describe "#build_version_on_update" do + let(:widget) do + super().tap do |w| + w.save! + w.attributes = { + name: "Dostoyevsky", + a_text: "The slow yellow mouse", + an_integer: 84, + a_float: 306.02, + a_decimal: 5.43656, + a_boolean: false + } + end + end + let(:version_building) do + lambda do + widget.paper_trail.build_version_on_update( + force: false, + in_after_callback: false, + is_touch: false + ) + end + end + + it "is frugal enough" do + # Some time ago there was 144kbs.. + # At the time of commit the test passes with assertion on 27kbs. + # Lets assert 35kbs then, to avoid flaky fails. + expect(&version_building).to allocate_less_than(35).kilobytes + end + end + end end diff --git a/spec/support/performance_helpers.rb b/spec/support/performance_helpers.rb new file mode 100644 index 00000000..077a984b --- /dev/null +++ b/spec/support/performance_helpers.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require "memory_profiler" + +RSpec::Matchers.define :allocate_less_than do |expected| + supports_block_expectations + + chain :bytes do + @scale = :bs + end + + chain :kilobytes do + @scale = :kbs + end + + chain :and_print_report do + @report = true + end + + match do |actual| + @scale ||= :bs + + benchmark = MemoryProfiler.report(ignore_files: /rspec/) { actual.call } + + if @report + benchmark.pretty_print(detailed_report: true, scale_bytes: true) + end + + @allocated = benchmark.total_allocated_memsize + @allocated /= 1024 if @scale == :kbs + @allocated <= expected + end + + failure_message do + "expected that example will allocate less than #{expected}#{@scale},"\ + " but allocated #{@allocated}#{@scale}" + end +end