diff --git a/activerecord/CHANGELOG.md b/activerecord/CHANGELOG.md index 0105073ccf..8035aaef2f 100644 --- a/activerecord/CHANGELOG.md +++ b/activerecord/CHANGELOG.md @@ -1,9 +1,25 @@ +* Support descending order for `find_each`, `find_in_batches` and `in_batches`. + + Batch processing methods allow you to work with the records in batches, greatly reducing memory consumption, but records are always batched from oldest id to newest. + + This change allows reversing the order, batching from newest to oldest. This is useful when you need to process newer batches of records first. + + Pass `order: :desc` to yield batches in descending order. The default remains `order: :asc`. + + ```ruby + Person.find_each(order: :desc) do |person| + person.party_all_night! + end + ``` + + *Alexey Vasiliev* + * Fix insert_all with enum values Fixes #38716. *Joel Blum* - + * Add support for `db:rollback:name` for multiple database applications. Multiple database applications will now raise if `db:rollback` is call and recommend using the `db:rollback:[NAME]` to rollback migrations. diff --git a/activerecord/lib/active_record/relation/batches.rb b/activerecord/lib/active_record/relation/batches.rb index 30b8edd0bd..fe3daebe5b 100644 --- a/activerecord/lib/active_record/relation/batches.rb +++ b/activerecord/lib/active_record/relation/batches.rb @@ -37,6 +37,7 @@ module ActiveRecord # * :finish - Specifies the primary key value to end at, inclusive of the value. # * :error_on_ignore - Overrides the application config to specify if an error should be raised when # an order is present in the relation. + # * :order - Specifies the primary key order (can be :asc or :desc). Defaults to :asc. # # Limits are honored, and if present there is no requirement for the batch # size: it can be less than, equal to, or greater than the limit. @@ -57,22 +58,22 @@ module ActiveRecord # person.party_all_night! # end # - # NOTE: It's not possible to set the order. That is automatically set to - # ascending on the primary key ("id ASC") to make the batch ordering - # work. This also means that this method only works when the primary key is + # NOTE: Order can be ascending (:asc) or descending (:desc). It is automatically set to + # ascending on the primary key ("id ASC"). + # This also means that this method only works when the primary key is # orderable (e.g. an integer or string). # # NOTE: By its nature, batch processing is subject to race conditions if # other processes are modifying the database. - def find_each(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil) + def find_each(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil, order: :asc) if block_given? - find_in_batches(start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore) do |records| + find_in_batches(start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore, order: order) do |records| records.each { |record| yield record } end else - enum_for(:find_each, start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore) do + enum_for(:find_each, start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore, order: order) do relation = self - apply_limits(relation, start, finish).size + apply_limits(relation, start, finish, order).size end end end @@ -101,6 +102,7 @@ module ActiveRecord # * :finish - Specifies the primary key value to end at, inclusive of the value. # * :error_on_ignore - Overrides the application config to specify if an error should be raised when # an order is present in the relation. + # * :order - Specifies the primary key order (can be :asc or :desc). Defaults to :asc. # # Limits are honored, and if present there is no requirement for the batch # size: it can be less than, equal to, or greater than the limit. @@ -116,23 +118,23 @@ module ActiveRecord # group.each { |person| person.party_all_night! } # end # - # NOTE: It's not possible to set the order. That is automatically set to - # ascending on the primary key ("id ASC") to make the batch ordering - # work. This also means that this method only works when the primary key is + # NOTE: Order can be ascending (:asc) or descending (:desc). It is automatically set to + # ascending on the primary key ("id ASC"). + # This also means that this method only works when the primary key is # orderable (e.g. an integer or string). # # NOTE: By its nature, batch processing is subject to race conditions if # other processes are modifying the database. - def find_in_batches(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil) + def find_in_batches(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil, order: :asc) relation = self unless block_given? - return to_enum(:find_in_batches, start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore) do - total = apply_limits(relation, start, finish).size + return to_enum(:find_in_batches, start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore, order: order) do + total = apply_limits(relation, start, finish, order).size (total - 1).div(batch_size) + 1 end end - in_batches(of: batch_size, start: start, finish: finish, load: true, error_on_ignore: error_on_ignore) do |batch| + in_batches(of: batch_size, start: start, finish: finish, load: true, error_on_ignore: error_on_ignore, order: order) do |batch| yield batch.to_a end end @@ -165,6 +167,7 @@ module ActiveRecord # * :finish - Specifies the primary key value to end at, inclusive of the value. # * :error_on_ignore - Overrides the application config to specify if an error should be raised when # an order is present in the relation. + # * :order - Specifies the primary key order (can be :asc or :desc). Defaults to :asc. # # Limits are honored, and if present there is no requirement for the batch # size, it can be less than, equal, or greater than the limit. @@ -191,19 +194,23 @@ module ActiveRecord # # Person.in_batches.each_record(&:party_all_night!) # - # NOTE: It's not possible to set the order. That is automatically set to - # ascending on the primary key ("id ASC") to make the batch ordering - # consistent. Therefore the primary key must be orderable, e.g. an integer - # or a string. + # NOTE: Order can be ascending (:asc) or descending (:desc). It is automatically set to + # ascending on the primary key ("id ASC"). + # This also means that this method only works when the primary key is + # orderable (e.g. an integer or string). # # NOTE: By its nature, batch processing is subject to race conditions if # other processes are modifying the database. - def in_batches(of: 1000, start: nil, finish: nil, load: false, error_on_ignore: nil) + def in_batches(of: 1000, start: nil, finish: nil, load: false, error_on_ignore: nil, order: :asc) relation = self unless block_given? return BatchEnumerator.new(of: of, start: start, finish: finish, relation: self) end + unless [:asc, :desc].include?(order) + raise ArgumentError, ":order must be :asc or :desc, got #{order.inspect}" + end + if arel.orders.present? act_on_ignored_order(error_on_ignore) end @@ -214,8 +221,8 @@ module ActiveRecord batch_limit = remaining if remaining < batch_limit end - relation = relation.reorder(batch_order).limit(batch_limit) - relation = apply_limits(relation, start, finish) + relation = relation.reorder(batch_order(order)).limit(batch_limit) + relation = apply_limits(relation, start, finish, order) relation.skip_query_cache! # Retaining the results in the query cache would undermine the point of batching batch_relation = relation @@ -252,28 +259,28 @@ module ActiveRecord end batch_relation = relation.where( - bind_attribute(primary_key, primary_key_offset) { |attr, bind| attr.gt(bind) } + bind_attribute(primary_key, primary_key_offset) { |attr, bind| order == :desc ? attr.lt(bind) : attr.gt(bind) } ) end end private - def apply_limits(relation, start, finish) - relation = apply_start_limit(relation, start) if start - relation = apply_finish_limit(relation, finish) if finish + def apply_limits(relation, start, finish, order) + relation = apply_start_limit(relation, start, order) if start + relation = apply_finish_limit(relation, finish, order) if finish relation end - def apply_start_limit(relation, start) - relation.where(bind_attribute(primary_key, start) { |attr, bind| attr.gteq(bind) }) + def apply_start_limit(relation, start, order) + relation.where(bind_attribute(primary_key, start) { |attr, bind| order == :desc ? attr.lteq(bind) : attr.gteq(bind) }) end - def apply_finish_limit(relation, finish) - relation.where(bind_attribute(primary_key, finish) { |attr, bind| attr.lteq(bind) }) + def apply_finish_limit(relation, finish, order) + relation.where(bind_attribute(primary_key, finish) { |attr, bind| order == :desc ? attr.gteq(bind) : attr.lteq(bind) }) end - def batch_order - arel_attribute(primary_key).asc + def batch_order(order) + arel_attribute(primary_key).public_send(order) end def act_on_ignored_order(error_on_ignore) diff --git a/activerecord/test/cases/batches_test.rb b/activerecord/test/cases/batches_test.rb index f36730333b..1f071fb95b 100644 --- a/activerecord/test/cases/batches_test.rb +++ b/activerecord/test/cases/batches_test.rb @@ -154,6 +154,24 @@ class EachTest < ActiveRecord::TestCase end end + def test_find_in_batches_should_quote_batch_order_with_desc_order + c = Post.connection + assert_sql(/ORDER BY #{Regexp.escape(c.quote_table_name("posts.id"))} DESC/) do + Post.find_in_batches(batch_size: 1, order: :desc) do |batch| + assert_kind_of Array, batch + assert_kind_of Post, batch.first + end + end + end + + def test_each_should_raise_if_order_is_invalid + assert_raise(ArgumentError) do + Post.select(:title).find_each(batch_size: 1, order: :invalid) { |post| + flunk "should not call this block" + } + end + end + def test_find_in_batches_should_not_use_records_after_yielding_them_in_case_original_array_is_modified not_a_post = +"not a post" def not_a_post.id; end @@ -413,6 +431,16 @@ class EachTest < ActiveRecord::TestCase end end + def test_in_batches_should_quote_batch_order_with_desc_order + c = Post.connection + assert_sql(/ORDER BY #{Regexp.escape(c.quote_table_name("posts.id"))} DESC/) do + Post.in_batches(of: 1, order: :desc) do |relation| + assert_kind_of ActiveRecord::Relation, relation + assert_kind_of Post, relation.first + end + end + end + def test_in_batches_should_not_use_records_after_yielding_them_in_case_original_array_is_modified not_a_post = +"not a post" def not_a_post.id