From dc3cc6c608b93209b23bbebd2ade04835abd6f6c Mon Sep 17 00:00:00 2001 From: Pratik Naik Date: Fri, 12 Feb 2010 16:53:51 +0000 Subject: [PATCH] Move batch finders to Relation --- activerecord/lib/active_record.rb | 2 +- activerecord/lib/active_record/base.rb | 3 +- activerecord/lib/active_record/batches.rb | 79 ------------------- activerecord/lib/active_record/relation.rb | 2 +- .../lib/active_record/relation/batches.rb | 78 ++++++++++++++++++ 5 files changed, 82 insertions(+), 82 deletions(-) delete mode 100644 activerecord/lib/active_record/batches.rb create mode 100644 activerecord/lib/active_record/relation/batches.rb diff --git a/activerecord/lib/active_record.rb b/activerecord/lib/active_record.rb index 9535c576be..b79da4565d 100644 --- a/activerecord/lib/active_record.rb +++ b/activerecord/lib/active_record.rb @@ -55,10 +55,10 @@ module ActiveRecord autoload :Calculations autoload :PredicateBuilder autoload :SpawnMethods + autoload :Batches end autoload :Base - autoload :Batches autoload :Callbacks autoload :DynamicFinderMatch autoload :DynamicScopeMatch diff --git a/activerecord/lib/active_record/base.rb b/activerecord/lib/active_record/base.rb index c4e07e8786..f30eba4f06 100755 --- a/activerecord/lib/active_record/base.rb +++ b/activerecord/lib/active_record/base.rb @@ -557,6 +557,7 @@ module ActiveRecord #:nodoc: alias :colorize_logging= :colorize_logging delegate :find, :first, :last, :all, :destroy, :destroy_all, :exists?, :delete, :delete_all, :update, :update_all, :to => :scoped + delegate :find_each, :find_in_batches, :to => :scoped delegate :select, :group, :order, :limit, :joins, :where, :preload, :eager_load, :includes, :from, :lock, :readonly, :having, :to => :scoped delegate :count, :average, :minimum, :maximum, :sum, :calculate, :to => :scoped @@ -2394,7 +2395,7 @@ module ActiveRecord #:nodoc: # #save_with_autosave_associations to be wrapped inside a transaction. include AutosaveAssociation, NestedAttributes - include Aggregations, Transactions, Reflection, Batches, Serialization + include Aggregations, Transactions, Reflection, Serialization end end diff --git a/activerecord/lib/active_record/batches.rb b/activerecord/lib/active_record/batches.rb deleted file mode 100644 index e41d38fb8f..0000000000 --- a/activerecord/lib/active_record/batches.rb +++ /dev/null @@ -1,79 +0,0 @@ -module ActiveRecord - module Batches # :nodoc: - extend ActiveSupport::Concern - - # When processing large numbers of records, it's often a good idea to do - # so in batches to prevent memory ballooning. - module ClassMethods - # Yields each record that was found by the find +options+. The find is - # performed by find_in_batches with a batch size of 1000 (or as - # specified by the :batch_size option). - # - # Example: - # - # Person.find_each(:conditions => "age > 21") do |person| - # person.party_all_night! - # end - # - # Note: This method is only intended to use for batch processing of - # large amounts of records that wouldn't fit in memory all at once. If - # you just need to loop over less than 1000 records, it's probably - # better just to use the regular find methods. - def find_each(options = {}) - find_in_batches(options) do |records| - records.each { |record| yield record } - end - - self - end - - # Yields each batch of records that was found by the find +options+ as - # an array. The size of each batch is set by the :batch_size - # option; the default is 1000. - # - # You can control the starting point for the batch processing by - # supplying the :start option. This is especially useful if you - # want multiple workers dealing with the same processing queue. You can - # make worker 1 handle all the records between id 0 and 10,000 and - # worker 2 handle from 10,000 and beyond (by setting the :start - # option on that worker). - # - # It's not possible to set the order. That is automatically set to - # ascending on the primary key ("id ASC") to make the batch ordering - # work. This also mean that this method only works with integer-based - # primary keys. You can't set the limit either, that's used to control - # the the batch sizes. - # - # Example: - # - # Person.find_in_batches(:conditions => "age > 21") do |group| - # sleep(50) # Make sure it doesn't get too crowded in there! - # group.each { |person| person.party_all_night! } - # end - def find_in_batches(options = {}) - raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order] - raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit] - - start = options.delete(:start).to_i - batch_size = options.delete(:batch_size) || 1000 - - with_scope(:find => options.merge(:order => batch_order, :limit => batch_size)) do - records = find(:all, :conditions => [ "#{table_name}.#{primary_key} >= ?", start ]) - - while records.any? - yield records - - break if records.size < batch_size - records = find(:all, :conditions => [ "#{table_name}.#{primary_key} > ?", records.last.id ]) - end - end - end - - - private - def batch_order - "#{table_name}.#{primary_key} ASC" - end - end - end -end \ No newline at end of file diff --git a/activerecord/lib/active_record/relation.rb b/activerecord/lib/active_record/relation.rb index 1a96cdad17..7bc3d3bf33 100644 --- a/activerecord/lib/active_record/relation.rb +++ b/activerecord/lib/active_record/relation.rb @@ -5,7 +5,7 @@ module ActiveRecord MULTI_VALUE_METHODS = [:select, :group, :order, :joins, :where, :having] SINGLE_VALUE_METHODS = [:limit, :offset, :lock, :readonly, :create_with, :from] - include FinderMethods, Calculations, SpawnMethods, QueryMethods + include FinderMethods, Calculations, SpawnMethods, QueryMethods, Batches delegate :length, :collect, :map, :each, :all?, :include?, :to => :to_a delegate :insert, :to => :arel diff --git a/activerecord/lib/active_record/relation/batches.rb b/activerecord/lib/active_record/relation/batches.rb new file mode 100644 index 0000000000..4a260d4caa --- /dev/null +++ b/activerecord/lib/active_record/relation/batches.rb @@ -0,0 +1,78 @@ +module ActiveRecord + module Batches # :nodoc: + # Yields each record that was found by the find +options+. The find is + # performed by find_in_batches with a batch size of 1000 (or as + # specified by the :batch_size option). + # + # Example: + # + # Person.where("age > 21").find_each do |person| + # person.party_all_night! + # end + # + # Note: This method is only intended to use for batch processing of + # large amounts of records that wouldn't fit in memory all at once. If + # you just need to loop over less than 1000 records, it's probably + # better just to use the regular find methods. + def find_each(options = {}) + find_in_batches(options) do |records| + records.each { |record| yield record } + end + + self + end + + # Yields each batch of records that was found by the find +options+ as + # an array. The size of each batch is set by the :batch_size + # option; the default is 1000. + # + # You can control the starting point for the batch processing by + # supplying the :start option. This is especially useful if you + # want multiple workers dealing with the same processing queue. You can + # make worker 1 handle all the records between id 0 and 10,000 and + # worker 2 handle from 10,000 and beyond (by setting the :start + # option on that worker). + # + # It's not possible to set the order. That is automatically set to + # ascending on the primary key ("id ASC") to make the batch ordering + # work. This also mean that this method only works with integer-based + # primary keys. You can't set the limit either, that's used to control + # the the batch sizes. + # + # Example: + # + # Person.where("age > 21").find_in_batches do |group| + # sleep(50) # Make sure it doesn't get too crowded in there! + # group.each { |person| person.party_all_night! } + # end + def find_in_batches(options = {}) + relation = self + + if (finder_options = options.except(:start, :batch_size)).present? + raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order].present? + raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit].present? + + relation = apply_finder_options(finder_options) + end + + start = options.delete(:start).to_i + batch_size = options.delete(:batch_size) || 1000 + + relation = relation.except(:order).order(batch_order).limit(batch_size) + records = relation.where(primary_key.gteq(start)).all + + while records.any? + yield records + + break if records.size < batch_size + records = relation.where(primary_key.gt(records.last.id)).all + end + end + + private + + def batch_order + "#{@klass.table_name}.#{@klass.primary_key} ASC" + end + end +end \ No newline at end of file