#-- # frozen_string_literal: true # # set.rb - defines the Set class #++ # Copyright (c) 2002-2016 Akinori MUSHA <knu@iDaemons.org> # # Documentation by Akinori MUSHA and Gavin Sinclair. # # All rights reserved. You can redistribute and/or modify it under the same # terms as Ruby. # # $Id$ # # == Overview # # This library provides the Set class, which deals with a collection # of unordered values with no duplicates. It is a hybrid of Array's # intuitive inter-operation facilities and Hash's fast lookup. If you # need to keep values sorted in some order, use the SortedSet class. # # The method +to_set+ is added to Enumerable for convenience. # # See the Set and SortedSet documentation for examples of usage. # # Set implements a collection of unordered values with no duplicates. # This is a hybrid of Array's intuitive inter-operation facilities and # Hash's fast lookup. # # Set is easy to use with Enumerable objects (implementing +each+). # Most of the initializer methods and binary operators accept generic # Enumerable objects besides sets and arrays. An Enumerable object # can be converted to Set using the +to_set+ method. # # Set uses Hash as storage, so you must note the following points: # # * Equality of elements is determined according to Object#eql? and # Object#hash. Use Set#compare_by_identity to make a set compare # its elements by their identity. # * Set assumes that the identity of each element does not change # while it is stored. Modifying an element of a set will render the # set to an unreliable state. # * When a string is to be stored, a frozen copy of the string is # stored instead unless the original string is already frozen. # # == Comparison # # The comparison operators <, >, <=, and >= are implemented as # shorthand for the {proper_,}{subset?,superset?} methods. However, # the <=> operator is intentionally left out because not every pair of # sets is comparable ({x, y} vs. {x, z} for example). # # == Example # # require 'set' # s1 = Set[1, 2] #=> #<Set: {1, 2}> # s2 = [1, 2].to_set #=> #<Set: {1, 2}> # s1 == s2 #=> true # s1.add("foo") #=> #<Set: {1, 2, "foo"}> # s1.merge([2, 6]) #=> #<Set: {1, 2, "foo", 6}> # s1.subset?(s2) #=> false # s2.subset?(s1) #=> true # # == Contact # # - Akinori MUSHA <knu@iDaemons.org> (current maintainer) # class Set include Enumerable # Creates a new set containing the given objects. # # Set[1, 2] # => #<Set: {1, 2}> # Set[1, 2, 1] # => #<Set: {1, 2}> # Set[1, 'c', :s] # => #<Set: {1, "c", :s}> def self.[](*ary) new(ary) end # Creates a new set containing the elements of the given enumerable # object. # # If a block is given, the elements of enum are preprocessed by the # given block. # # Set.new([1, 2]) #=> #<Set: {1, 2}> # Set.new([1, 2, 1]) #=> #<Set: {1, 2}> # Set.new([1, 'c', :s]) #=> #<Set: {1, "c", :s}> # Set.new(1..5) #=> #<Set: {1, 2, 3, 4, 5}> # Set.new([1, 2, 3]) { |x| x * x } #=> #<Set: {1, 4, 9}> def initialize(enum = nil, &block) # :yields: o @hash ||= Hash.new(false) enum.nil? and return if block do_with_enum(enum) { |o| add(block[o]) } else merge(enum) end end # Makes the set compare its elements by their identity and returns # self. This method may not be supported by all subclasses of Set. def compare_by_identity if @hash.respond_to?(:compare_by_identity) @hash.compare_by_identity self else raise NotImplementedError, "#{self.class.name}\##{__method__} is not implemented" end end # Returns true if the set will compare its elements by their # identity. Also see Set#compare_by_identity. def compare_by_identity? @hash.respond_to?(:compare_by_identity?) && @hash.compare_by_identity? end def do_with_enum(enum, &block) # :nodoc: if enum.respond_to?(:each_entry) enum.each_entry(&block) if block elsif enum.respond_to?(:each) enum.each(&block) if block else raise ArgumentError, "value must be enumerable" end end private :do_with_enum # Dup internal hash. def initialize_dup(orig) super @hash = orig.instance_variable_get(:@hash).dup end # Clone internal hash. def initialize_clone(orig) super @hash = orig.instance_variable_get(:@hash).clone end def freeze # :nodoc: @hash.freeze super end def taint # :nodoc: @hash.taint super end def untaint # :nodoc: @hash.untaint super end # Returns the number of elements. def size @hash.size end alias length size # Returns true if the set contains no elements. def empty? @hash.empty? end # Removes all elements and returns self. # # set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}> # set.clear #=> #<Set: {}> # set #=> #<Set: {}> def clear @hash.clear self end # Replaces the contents of the set with the contents of the given # enumerable object and returns self. # # set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}> # set.replace([1, 2]) #=> #<Set: {1, 2}> # set #=> #<Set: {1, 2}> def replace(enum) if enum.instance_of?(self.class) @hash.replace(enum.instance_variable_get(:@hash)) self else do_with_enum(enum) # make sure enum is enumerable before calling clear clear merge(enum) end end # Converts the set to an array. The order of elements is uncertain. # # Set[1, 2].to_a #=> [1, 2] # Set[1, 'c', :s].to_a #=> [1, "c", :s] def to_a @hash.keys end # Returns self if no arguments are given. Otherwise, converts the # set to another with klass.new(self, *args, &block). # # In subclasses, returns klass.new(self, *args, &block) unless # overridden. def to_set(klass = Set, *args, &block) return self if instance_of?(Set) && klass == Set && block.nil? && args.empty? klass.new(self, *args, &block) end def flatten_merge(set, seen = Set.new) # :nodoc: set.each { |e| if e.is_a?(Set) if seen.include?(e_id = e.object_id) raise ArgumentError, "tried to flatten recursive Set" end seen.add(e_id) flatten_merge(e, seen) seen.delete(e_id) else add(e) end } self end protected :flatten_merge # Returns a new set that is a copy of the set, flattening each # containing set recursively. def flatten self.class.new.flatten_merge(self) end # Equivalent to Set#flatten, but replaces the receiver with the # result in place. Returns nil if no modifications were made. def flatten! replace(flatten()) if any? { |e| e.is_a?(Set) } end # Returns true if the set contains the given object. # # Note that <code>include?</code> and <code>member?</code> do not test member # equality using <code>==</code> as do other Enumerables. # # See also Enumerable#include? def include?(o) @hash[o] end alias member? include? # Returns true if the set is a superset of the given set. def superset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:>=) @hash >= set.instance_variable_get(:@hash) when set.is_a?(Set) size >= set.size && set.all? { |o| include?(o) } else raise ArgumentError, "value must be a set" end end alias >= superset? # Returns true if the set is a proper superset of the given set. def proper_superset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:>) @hash > set.instance_variable_get(:@hash) when set.is_a?(Set) size > set.size && set.all? { |o| include?(o) } else raise ArgumentError, "value must be a set" end end alias > proper_superset? # Returns true if the set is a subset of the given set. def subset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:<=) @hash <= set.instance_variable_get(:@hash) when set.is_a?(Set) size <= set.size && all? { |o| set.include?(o) } else raise ArgumentError, "value must be a set" end end alias <= subset? # Returns true if the set is a proper subset of the given set. def proper_subset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:<) @hash < set.instance_variable_get(:@hash) when set.is_a?(Set) size < set.size && all? { |o| set.include?(o) } else raise ArgumentError, "value must be a set" end end alias < proper_subset? # Returns true if the set and the given set have at least one # element in common. # # Set[1, 2, 3].intersect? Set[4, 5] #=> false # Set[1, 2, 3].intersect? Set[3, 4] #=> true def intersect?(set) set.is_a?(Set) or raise ArgumentError, "value must be a set" if size < set.size any? { |o| set.include?(o) } else set.any? { |o| include?(o) } end end # Returns true if the set and the given set have no element in # common. This method is the opposite of +intersect?+. # # Set[1, 2, 3].disjoint? Set[3, 4] #=> false # Set[1, 2, 3].disjoint? Set[4, 5] #=> true def disjoint?(set) !intersect?(set) end # Calls the given block once for each element in the set, passing # the element as parameter. Returns an enumerator if no block is # given. def each(&block) block or return enum_for(__method__) { size } @hash.each_key(&block) self end # Adds the given object to the set and returns self. Use +merge+ to # add many elements at once. # # Set[1, 2].add(3) #=> #<Set: {1, 2, 3}> # Set[1, 2].add([3, 4]) #=> #<Set: {1, 2, [3, 4]}> # Set[1, 2].add(2) #=> #<Set: {1, 2}> def add(o) @hash[o] = true self end alias << add # Adds the given object to the set and returns self. If the # object is already in the set, returns nil. # # Set[1, 2].add?(3) #=> #<Set: {1, 2, 3}> # Set[1, 2].add?([3, 4]) #=> #<Set: {1, 2, [3, 4]}> # Set[1, 2].add?(2) #=> nil def add?(o) add(o) unless include?(o) end # Deletes the given object from the set and returns self. Use +subtract+ to # delete many items at once. def delete(o) @hash.delete(o) self end # Deletes the given object from the set and returns self. If the # object is not in the set, returns nil. def delete?(o) delete(o) if include?(o) end # Deletes every element of the set for which block evaluates to # true, and returns self. Returns an enumerator if no block is # given. def delete_if block_given? or return enum_for(__method__) { size } # @hash.delete_if should be faster, but using it breaks the order # of enumeration in subclasses. select { |o| yield o }.each { |o| @hash.delete(o) } self end # Deletes every element of the set for which block evaluates to # false, and returns self. Returns an enumerator if no block is # given. def keep_if block_given? or return enum_for(__method__) { size } # @hash.keep_if should be faster, but using it breaks the order of # enumeration in subclasses. reject { |o| yield o }.each { |o| @hash.delete(o) } self end # Replaces the elements with ones returned by collect(). # Returns an enumerator if no block is given. def collect! block_given? or return enum_for(__method__) { size } set = self.class.new each { |o| set << yield(o) } replace(set) end alias map! collect! # Equivalent to Set#delete_if, but returns nil if no changes were # made. Returns an enumerator if no block is given. def reject!(&block) block or return enum_for(__method__) { size } n = size delete_if(&block) self if size != n end # Equivalent to Set#keep_if, but returns nil if no changes were # made. Returns an enumerator if no block is given. def select!(&block) block or return enum_for(__method__) { size } n = size keep_if(&block) self if size != n end # Equivalent to Set#select! alias filter! select! # Merges the elements of the given enumerable object to the set and # returns self. def merge(enum) if enum.instance_of?(self.class) @hash.update(enum.instance_variable_get(:@hash)) else do_with_enum(enum) { |o| add(o) } end self end # Deletes every element that appears in the given enumerable object # and returns self. def subtract(enum) do_with_enum(enum) { |o| delete(o) } self end # Returns a new set built by merging the set and the elements of the # given enumerable object. # # Set[1, 2, 3] | Set[2, 4, 5] #=> #<Set: {1, 2, 3, 4, 5}> # Set[1, 5, 'z'] | (1..6) #=> #<Set: {1, 5, "z", 2, 3, 4, 6}> def |(enum) dup.merge(enum) end alias + | alias union | # Returns a new set built by duplicating the set, removing every # element that appears in the given enumerable object. # # Set[1, 3, 5] - Set[1, 5] #=> #<Set: {3}> # Set['a', 'b', 'z'] - ['a', 'c'] #=> #<Set: {"b", "z"}> def -(enum) dup.subtract(enum) end alias difference - # Returns a new set containing elements common to the set and the # given enumerable object. # # Set[1, 3, 5] & Set[3, 2, 1] #=> #<Set: {3, 1}> # Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> #<Set: {"a", "b"}> def &(enum) n = self.class.new do_with_enum(enum) { |o| n.add(o) if include?(o) } n end alias intersection & # Returns a new set containing elements exclusive between the set # and the given enumerable object. (set ^ enum) is equivalent to # ((set | enum) - (set & enum)). # # Set[1, 2] ^ Set[2, 3] #=> #<Set: {3, 1}> # Set[1, 'b', 'c'] ^ ['b', 'd'] #=> #<Set: {"d", 1, "c"}> def ^(enum) n = Set.new(enum) each { |o| n.add(o) unless n.delete?(o) } n end # Returns true if two sets are equal. The equality of each couple # of elements is defined according to Object#eql?. # # Set[1, 2] == Set[2, 1] #=> true # Set[1, 3, 5] == Set[1, 5] #=> false # Set['a', 'b', 'c'] == Set['a', 'c', 'b'] #=> true # Set['a', 'b', 'c'] == ['a', 'c', 'b'] #=> false def ==(other) if self.equal?(other) true elsif other.instance_of?(self.class) @hash == other.instance_variable_get(:@hash) elsif other.is_a?(Set) && self.size == other.size other.all? { |o| @hash.include?(o) } else false end end def hash # :nodoc: @hash.hash end def eql?(o) # :nodoc: return false unless o.is_a?(Set) @hash.eql?(o.instance_variable_get(:@hash)) end # Resets the internal state after modification to existing elements # and returns self. # # Elements will be reindexed and deduplicated. def reset if @hash.respond_to?(:rehash) @hash.rehash # This should perform frozenness check. else raise FrozenError, "can't modify frozen #{self.class.name}" if frozen? end self end # Returns true if the given object is a member of the set, # and false otherwise. # # Used in case statements: # # require 'set' # # case :apple # when Set[:potato, :carrot] # "vegetable" # when Set[:apple, :banana] # "fruit" # end # # => "fruit" # # Or by itself: # # Set[1, 2, 3] === 2 #=> true # Set[1, 2, 3] === 4 #=> false # alias === include? # Classifies the set by the return value of the given block and # returns a hash of {value => set of elements} pairs. The block is # called once for each element of the set, passing the element as # parameter. # # require 'set' # files = Set.new(Dir.glob("*.rb")) # hash = files.classify { |f| File.mtime(f).year } # hash #=> {2000=>#<Set: {"a.rb", "b.rb"}>, # # 2001=>#<Set: {"c.rb", "d.rb", "e.rb"}>, # # 2002=>#<Set: {"f.rb"}>} # # Returns an enumerator if no block is given. def classify # :yields: o block_given? or return enum_for(__method__) { size } h = {} each { |i| (h[yield(i)] ||= self.class.new).add(i) } h end # Divides the set into a set of subsets according to the commonality # defined by the given block. # # If the arity of the block is 2, elements o1 and o2 are in common # if block.call(o1, o2) is true. Otherwise, elements o1 and o2 are # in common if block.call(o1) == block.call(o2). # # require 'set' # numbers = Set[1, 3, 4, 6, 9, 10, 11] # set = numbers.divide { |i,j| (i - j).abs == 1 } # set #=> #<Set: {#<Set: {1}>, # # #<Set: {11, 9, 10}>, # # #<Set: {3, 4}>, # # #<Set: {6}>}> # # Returns an enumerator if no block is given. def divide(&func) func or return enum_for(__method__) { size } if func.arity == 2 require 'tsort' class << dig = {} # :nodoc: include TSort alias tsort_each_node each_key def tsort_each_child(node, &block) fetch(node).each(&block) end end each { |u| dig[u] = a = [] each{ |v| func.call(u, v) and a << v } } set = Set.new() dig.each_strongly_connected_component { |css| set.add(self.class.new(css)) } set else Set.new(classify(&func).values) end end InspectKey = :__inspect_key__ # :nodoc: # Returns a string containing a human-readable representation of the # set ("#<Set: {element1, element2, ...}>"). def inspect ids = (Thread.current[InspectKey] ||= []) if ids.include?(object_id) return sprintf('#<%s: {...}>', self.class.name) end ids << object_id begin return sprintf('#<%s: {%s}>', self.class, to_a.inspect[1..-2]) ensure ids.pop end end alias to_s inspect def pretty_print(pp) # :nodoc: pp.text sprintf('#<%s: {', self.class.name) pp.nest(1) { pp.seplist(self) { |o| pp.pp o } } pp.text "}>" end def pretty_print_cycle(pp) # :nodoc: pp.text sprintf('#<%s: {%s}>', self.class.name, empty? ? '' : '...') end end # # SortedSet implements a Set that guarantees that its elements are # yielded in sorted order (according to the return values of their # #<=> methods) when iterating over them. # # All elements that are added to a SortedSet must respond to the <=> # method for comparison. # # Also, all elements must be <em>mutually comparable</em>: <tt>el1 <=> # el2</tt> must not return <tt>nil</tt> for any elements <tt>el1</tt> # and <tt>el2</tt>, else an ArgumentError will be raised when # iterating over the SortedSet. # # == Example # # require "set" # # set = SortedSet.new([2, 1, 5, 6, 4, 5, 3, 3, 3]) # ary = [] # # set.each do |obj| # ary << obj # end # # p ary # => [1, 2, 3, 4, 5, 6] # # set2 = SortedSet.new([1, 2, "3"]) # set2.each { |obj| } # => raises ArgumentError: comparison of Fixnum with String failed # class SortedSet < Set @@setup = false @@mutex = Mutex.new class << self def [](*ary) # :nodoc: new(ary) end def setup # :nodoc: @@setup and return @@mutex.synchronize do # a hack to shut up warning alias_method :old_init, :initialize begin require 'rbtree' module_eval <<-END, __FILE__, __LINE__+1 def initialize(*args) @hash = RBTree.new super end def add(o) o.respond_to?(:<=>) or raise ArgumentError, "value must respond to <=>" super end alias << add END rescue LoadError module_eval <<-END, __FILE__, __LINE__+1 def initialize(*args) @keys = nil super end def clear @keys = nil super end def replace(enum) @keys = nil super end def add(o) o.respond_to?(:<=>) or raise ArgumentError, "value must respond to <=>" @keys = nil super end alias << add def delete(o) @keys = nil @hash.delete(o) self end def delete_if block_given? or return enum_for(__method__) { size } n = @hash.size super @keys = nil if @hash.size != n self end def keep_if block_given? or return enum_for(__method__) { size } n = @hash.size super @keys = nil if @hash.size != n self end def merge(enum) @keys = nil super end def each(&block) block or return enum_for(__method__) { size } to_a.each(&block) self end def to_a (@keys = @hash.keys).sort! unless @keys @keys end def freeze to_a super end def rehash @keys = nil super end END end # a hack to shut up warning remove_method :old_init @@setup = true end end end def initialize(*args, &block) # :nodoc: SortedSet.setup @keys = nil super end end module Enumerable # Makes a set from the enumerable object with given arguments. # Needs to +require "set"+ to use this method. def to_set(klass = Set, *args, &block) klass.new(self, *args, &block) end end # =begin # == RestricedSet class # RestricedSet implements a set with restrictions defined by a given # block. # # === Super class # Set # # === Class Methods # --- RestricedSet::new(enum = nil) { |o| ... } # --- RestricedSet::new(enum = nil) { |rset, o| ... } # Creates a new restricted set containing the elements of the given # enumerable object. Restrictions are defined by the given block. # # If the block's arity is 2, it is called with the RestrictedSet # itself and an object to see if the object is allowed to be put in # the set. # # Otherwise, the block is called with an object to see if the object # is allowed to be put in the set. # # === Instance Methods # --- restriction_proc # Returns the restriction procedure of the set. # # =end # # class RestricedSet < Set # def initialize(*args, &block) # @proc = block or raise ArgumentError, "missing a block" # # if @proc.arity == 2 # instance_eval %{ # def add(o) # @hash[o] = true if @proc.call(self, o) # self # end # alias << add # # def add?(o) # if include?(o) || !@proc.call(self, o) # nil # else # @hash[o] = true # self # end # end # # def replace(enum) # enum.respond_to?(:each) or raise ArgumentError, "value must be enumerable" # clear # enum.each_entry { |o| add(o) } # # self # end # # def merge(enum) # enum.respond_to?(:each) or raise ArgumentError, "value must be enumerable" # enum.each_entry { |o| add(o) } # # self # end # } # else # instance_eval %{ # def add(o) # if @proc.call(o) # @hash[o] = true # end # self # end # alias << add # # def add?(o) # if include?(o) || !@proc.call(o) # nil # else # @hash[o] = true # self # end # end # } # end # # super(*args) # end # # def restriction_proc # @proc # end # end # Tests have been moved to test/test_set.rb.