Lower the memory allocations

It prevents the repeating hashes creation during the calculation of `object` and `object_changes`.

The real-life case, discovered with a help of `memory_profiler` gem:
1. It saves `~35Mb` per bulk of 100 typical objects like `User`;
2. It saves `~875Mb` per Sidekiq process full of bulk jobs like P1,
   under default concurrency 25.
This commit is contained in:
Sergey Tokarenko 2019-03-08 08:17:31 +03:00 committed by Jared Beck
parent 43a0634589
commit cf17bc47a6
8 changed files with 200 additions and 33 deletions

View File

@ -18,6 +18,13 @@ recommendations of [keepachangelog.com](http://keepachangelog.com/).
- [#1184](https://github.com/paper-trail-gem/paper_trail/pull/1184) -
No need to calculate previous values of skipped attributes
- [#1188](https://github.com/paper-trail-gem/paper_trail/pull/1188) -
Optimized the memory allocations during the building of every particular
Version object. That can help a lot for heavy bulk processing.
In additional we advise to use `json[b]` DB types for `object`
and `object_changes` Version columns, in order to reach best possible
RAM performance.
## 10.2.0 (2019-01-31)
### Breaking Changes

View File

@ -60,15 +60,28 @@ module PaperTrail
# @api private
def nonskipped_attributes_before_change(is_touch)
record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])
cache_changed_attributes do
record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])
Hash[record_attributes.map do |k, v|
if @record.class.column_names.include?(k)
[k, attribute_in_previous_version(k, is_touch)]
else
[k, v]
record_attributes.each_key do |k|
if @record.class.column_names.include?(k)
record_attributes[k] = attribute_in_previous_version(k, is_touch)
end
end
end]
end
end
# Rails 5.1 changed the API of `ActiveRecord::Dirty`.
# @api private
def cache_changed_attributes
if RAILS_GTE_5_1
# Everything works fine as it is
yield
else
# Any particular call to `changed_attributes` produces the huge memory allocation.
# Lets use the generic AR workaround for that.
@record.send(:cache_changed_attributes) { yield }
end
end
# Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
@ -110,7 +123,8 @@ module PaperTrail
# @api private
def changed_in_latest_version
changes_in_latest_version.keys
# Memoized to reduce memory usage
@changed_in_latest_version ||= changes_in_latest_version.keys
end
# Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
@ -118,10 +132,13 @@ module PaperTrail
#
# @api private
def changes_in_latest_version
if @in_after_callback && RAILS_GTE_5_1
@record.saved_changes
else
@record.changes
# Memoized to reduce memory usage
@changes_in_latest_version ||= begin
if @in_after_callback && RAILS_GTE_5_1
@record.saved_changes
else
@record.changes
end
end
end
@ -202,16 +219,19 @@ module PaperTrail
# @api private
def notably_changed
only = @record.paper_trail_options[:only].dup
# Remove Hash arguments and then evaluate whether the attributes (the
# keys of the hash) should also get pushed into the collection.
only.delete_if do |obj|
obj.is_a?(Hash) &&
obj.each { |attr, condition|
only << attr if condition.respond_to?(:call) && condition.call(@record)
}
# Memoized to reduce memory usage
@notably_changed ||= begin
only = @record.paper_trail_options[:only].dup
# Remove Hash arguments and then evaluate whether the attributes (the
# keys of the hash) should also get pushed into the collection.
only.delete_if do |obj|
obj.is_a?(Hash) &&
obj.each { |attr, condition|
only << attr if condition.respond_to?(:call) && condition.call(@record)
}
end
only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
end
only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
end
# Returns hash of attributes (with appropriate attributes serialized),

View File

@ -13,6 +13,7 @@ module PaperTrail
# @api private
def data
data = {
item: @record,
event: @record.paper_trail_event || "create",
whodunnit: PaperTrail.request.whodunnit
}

View File

@ -25,6 +25,7 @@ module PaperTrail
# @api private
def data
data = {
item: @record,
event: @record.paper_trail_event || "update",
whodunnit: PaperTrail.request.whodunnit
}

View File

@ -67,14 +67,26 @@ module PaperTrail
def record_create
return unless enabled?
event = Events::Create.new(@record, true)
build_version_on_create(in_after_callback: true).tap do |version|
version.save!
# One more Version object has been created. Need to invalidate
# the `versions` association cache, otherwise the new version
# can stay invisible within that association
versions.reset
end
end
# @api private
def build_version_on_create(in_after_callback:)
event = Events::Create.new(@record, in_after_callback)
# Merge data from `Event` with data from PT-AT. We no longer use
# `data_for_create` but PT-AT still does.
data = event.data.merge(data_for_create)
data = event.data.merge!(data_for_create)
versions_assoc = @record.send(@record.class.versions_association_name)
versions_assoc.create!(data)
# Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
@record.class.paper_trail.version_class.new(data)
end
# PT-AT extends this method to add its transaction id.
@ -119,20 +131,36 @@ module PaperTrail
# paper_trail-association_tracking
def record_update(force:, in_after_callback:, is_touch:)
return unless enabled?
version = build_version_on_update(
force: force,
in_after_callback: in_after_callback,
is_touch: is_touch
)
return unless version
if version.save
# One more Version object has been created. Need to invalidate
# the `versions` association cache, otherwise the new version
# can stay invisible within that association
versions.reset
version
else
log_version_errors(version, :update)
end
end
# @api private
def build_version_on_update(force:, in_after_callback:, is_touch:)
event = Events::Update.new(@record, in_after_callback, is_touch, nil)
return unless force || event.changed_notably?
# Merge data from `Event` with data from PT-AT. We no longer use
# `data_for_update` but PT-AT still does.
data = event.data.merge(data_for_update)
data = event.data.merge!(data_for_update)
versions_assoc = @record.send(@record.class.versions_association_name)
version = versions_assoc.create(data)
if version.errors.any?
log_version_errors(version, :update)
else
version
end
# Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
@record.class.paper_trail.version_class.new(data)
end
# PT-AT extends this method to add its transaction id.

View File

@ -35,6 +35,7 @@ has been destroyed.
s.add_development_dependency "byebug", "~> 10.0"
s.add_development_dependency "ffaker", "~> 2.8"
s.add_development_dependency "generator_spec", "~> 0.9.4"
s.add_development_dependency "memory_profiler", "~> 0.9.12"
s.add_development_dependency "mysql2", "~> 0.5.2"
s.add_development_dependency "paper_trail-association_tracking", "~> 2.0.0"
s.add_development_dependency "pg", "~> 1.0"

View File

@ -1,6 +1,7 @@
# frozen_string_literal: true
require "spec_helper"
require "support/performance_helpers"
RSpec.describe(::PaperTrail, versioning: true) do
context "a new record" do
@ -868,4 +869,74 @@ RSpec.describe(::PaperTrail, versioning: true) do
expect(widget.versions.empty?).to(eq(true))
end
end
context "Memory allocation of" do
let(:widget) do
Widget.new(
name: "Warble",
a_text: "The quick brown fox",
an_integer: 42,
a_float: 153.01,
a_decimal: 2.71828,
a_boolean: true
)
end
before do
# Json fields for `object` & `object_changes` attributes is most efficient way
# to do the things - this way we will save even more RAM, as well as will skip
# the whole YAML serialization
allow(PaperTrail::Version).to receive(:object_changes_col_is_json?).and_return(true)
allow(PaperTrail::Version).to receive(:object_col_is_json?).and_return(true)
# Force the loading of all lazy things like class definitions,
# in order to get the pure benchmark
version_building.call
end
describe "#build_version_on_create" do
let(:version_building) do
-> { widget.paper_trail.build_version_on_create(in_after_callback: false) }
end
it "is frugal enough" do
# Some time ago there was 95kbs..
# At the time of commit the test passes with assertion on 17kbs.
# Lets assert 20kbs then, to avoid flaky fails.
expect(&version_building).to allocate_less_than(20).kilobytes
end
end
describe "#build_version_on_update" do
let(:widget) do
super().tap do |w|
w.save!
w.attributes = {
name: "Dostoyevsky",
a_text: "The slow yellow mouse",
an_integer: 84,
a_float: 306.02,
a_decimal: 5.43656,
a_boolean: false
}
end
end
let(:version_building) do
lambda do
widget.paper_trail.build_version_on_update(
force: false,
in_after_callback: false,
is_touch: false
)
end
end
it "is frugal enough" do
# Some time ago there was 144kbs..
# At the time of commit the test passes with assertion on 27kbs.
# Lets assert 35kbs then, to avoid flaky fails.
expect(&version_building).to allocate_less_than(35).kilobytes
end
end
end
end

View File

@ -0,0 +1,38 @@
# frozen_string_literal: true
require "memory_profiler"
RSpec::Matchers.define :allocate_less_than do |expected|
supports_block_expectations
chain :bytes do
@scale = :bs
end
chain :kilobytes do
@scale = :kbs
end
chain :and_print_report do
@report = true
end
match do |actual|
@scale ||= :bs
benchmark = MemoryProfiler.report(ignore_files: /rspec/) { actual.call }
if @report
benchmark.pretty_print(detailed_report: true, scale_bytes: true)
end
@allocated = benchmark.total_allocated_memsize
@allocated /= 1024 if @scale == :kbs
@allocated <= expected
end
failure_message do
"expected that example will allocate less than #{expected}#{@scale},"\
" but allocated #{@allocated}#{@scale}"
end
end