From ad651925e365ca18645f05b5e9b2eca9cd5721bc Mon Sep 17 00:00:00 2001 From: Victor Shepelev Date: Fri, 30 Sep 2022 12:23:19 +0300 Subject: [PATCH] Add Data class implementation: Simple immutable value object --- NEWS.md | 6 + array.c | 2 +- internal/array.h | 1 + spec/ruby/core/data/constants_spec.rb | 14 +- struct.c | 529 +++++++++++++++++++++++++- test/ruby/test_data.rb | 170 +++++++++ 6 files changed, 716 insertions(+), 6 deletions(-) create mode 100644 test/ruby/test_data.rb diff --git a/NEWS.md b/NEWS.md index 3cc1983a8c..0aa51bb032 100644 --- a/NEWS.md +++ b/NEWS.md @@ -102,6 +102,11 @@ Note that each entry is kept to a minimum, see links for details. Note: We're only listing outstanding class updates. +* Data + * New core class to represent simple immutable value object. The class is + similar to `Struct` and partially shares an implementation, but has more + lean and strict API. [[Feature #16122]] + * Encoding * Encoding#replicate has been deprecated and will be removed in 3.3. [[Feature #18949]] * The dummy `Encoding::UTF_16` and `Encoding::UTF_32` encodings no longer @@ -323,3 +328,4 @@ The following deprecated APIs are removed. [Feature #18949]: https://bugs.ruby-lang.org/issues/18949 [Feature #19008]: https://bugs.ruby-lang.org/issues/19008 [Feature #19026]: https://bugs.ruby-lang.org/issues/19026 +[Feature #16122]: https://bugs.ruby-lang.org/issues/16122 diff --git a/array.c b/array.c index cacc549a24..73e8a3c9ce 100644 --- a/array.c +++ b/array.c @@ -5587,7 +5587,7 @@ ary_recycle_hash(VALUE hash) * Related: Array#difference. */ -static VALUE +VALUE rb_ary_diff(VALUE ary1, VALUE ary2) { VALUE ary3; diff --git a/internal/array.h b/internal/array.h index 17d91a800b..a0d16dec3f 100644 --- a/internal/array.h +++ b/internal/array.h @@ -35,6 +35,7 @@ void rb_ary_cancel_sharing(VALUE ary); size_t rb_ary_size_as_embedded(VALUE ary); void rb_ary_make_embedded(VALUE ary); bool rb_ary_embeddable_p(VALUE ary); +VALUE rb_ary_diff(VALUE ary1, VALUE ary2); static inline VALUE rb_ary_entry_internal(VALUE ary, long offset); static inline bool ARY_PTR_USING_P(VALUE ary); diff --git a/spec/ruby/core/data/constants_spec.rb b/spec/ruby/core/data/constants_spec.rb index 1d469f9237..d9d55b50f9 100644 --- a/spec/ruby/core/data/constants_spec.rb +++ b/spec/ruby/core/data/constants_spec.rb @@ -14,10 +14,22 @@ ruby_version_is ''...'3.0' do end end -ruby_version_is '3.0' do +ruby_version_is '3.0'...'3.2' do describe "Data" do it "does not exist anymore" do Object.should_not have_constant(:Data) end end end + +ruby_version_is '3.2' do + describe "Data" do + it "is a new constant" do + Data.superclass.should == Object + end + + it "is not deprecated" do + -> { Data }.should_not complain + end + end +end diff --git a/struct.c b/struct.c index 1e7294eb5e..57d7cffc30 100644 --- a/struct.c +++ b/struct.c @@ -28,7 +28,11 @@ enum { AREF_HASH_THRESHOLD = 10 }; +/* Note: Data is a stricter version of the Struct: no attr writers & no + hash-alike/array-alike behavior. It shares most of the implementation + on the C level, but is unrelated on the Ruby level. */ VALUE rb_cStruct; +static VALUE rb_cData; static ID id_members, id_back_members, id_keyword_init; static VALUE struct_alloc(VALUE); @@ -44,7 +48,7 @@ struct_ivar_get(VALUE c, ID id) for (;;) { c = rb_class_superclass(c); - if (c == 0 || c == rb_cStruct) + if (c == 0 || c == rb_cStruct || c == rb_cData) return Qnil; RUBY_ASSERT(RB_TYPE_P(c, T_CLASS)); ivar = rb_attr_get(c, id); @@ -297,6 +301,29 @@ rb_struct_s_inspect(VALUE klass) return inspect; } +static VALUE +rb_data_s_new(int argc, const VALUE *argv, VALUE klass) +{ + if (rb_keyword_given_p()) { + if (argc > 1 || !RB_TYPE_P(argv[0], T_HASH)) { + rb_error_arity(argc, 0, 0); + } + return rb_class_new_instance_pass_kw(argc, argv, klass); + } + else { + VALUE members = struct_ivar_get(klass, id_members); + int num_members = RARRAY_LENINT(members); + + rb_check_arity(argc, 0, num_members); + VALUE arg_hash = rb_hash_new_with_size(argc); + for (long i=0; i # + * + * # Keyword arguments constructor is provided + * weight = Measure.new(amount: 50, unit: 'kg') + * #=> # + * + * # Alternative form to construct an object: + * speed = Measure[10, 'mPh'] + * #=> # + * + * # Works with keyword arguments, too: + * area = Measure[amount: 1.5, unit: 'm^2'] + * #=> # + * + * # Argument accessors are provided: + * distance.amount #=> 100 + * distance.unit #=> "km" + * + * Constructed object also has a reasonable definitions of #== + * operator, #to_h hash conversion, and #deconstruct/#deconstruct_keys + * to be used in pattern matching. + * + * ::define method accepts an optional block and evaluates it in + * the context of the newly defined class. That allows to define + * additional methods: + * + * Measure = Data.define(:amount, :unit) do + * def <=>(other) + * return unless other.is_a?(self.class) && other.unit == unit + * amount <=> other.amount + * end + * + * include Comparable + * end + * + * Measure[3, 'm'] < Measure[5, 'm'] #=> true + * Measure[3, 'm'] < Measure[5, 'kg'] + * # comparison of Measure with Measure failed (ArgumentError) + * + * Data provides no member writers, or enumerators: it is meant + * to be a storage for immutable atomic values. But note that + * if some of data members is of a mutable class, Data does no additional + * immutability enforcement: + * + * Event = Data.define(:time, :weekdays) + * event = Event.new('18:00', %w[Tue Wed Fri]) + * #=> # + * + * # There is no #time= or #weekdays= accessors, but changes are + * # still possible: + * event.weekdays << 'Sat' + * event + * #=> # + * + * See also Struct, which is a similar concept, but has more + * container-alike API, allowing to change contents of the object + * and enumerate it. + */ + +/* + * call-seq: + * define(name, *symbols) -> class + * define(*symbols) -> class + * + * Defines a new \Data class. If the first argument is a string, the class + * is stored in Data:: constant. + * + * measure = Data.define(:amount, :unit) + * #=> # + * measure.new(1, 'km') + * #=> # + * + * # It you store the new class in the constant, it will + * # affect #inspect and will be more natural to use: + * Measure = Data.define(:amount, :unit) + * #=> Measure + * Measure.new(1, 'km') + * #=> # + * + * + * Note that member-less \Data is acceptable and might be a useful technique + * for defining several homogenous data classes, like + * + * class HTTPFetcher + * Response = Data.define(:body) + * NotFound = Data.define + * # ... implementation + * end + * + * Now, different kinds of responses from +HTTPFetcher+ would have consistent + * representation: + * + * # + * # + * + * And are convenient to use in pattern matching: + * + * case fetcher.get(url) + * in HTTPFetcher::Response(body) + * # process body variable + * in HTTPFetcher::NotFound + * # handle not found case + * end + */ + +static VALUE +rb_data_s_def(int argc, VALUE *argv, VALUE klass) +{ + VALUE rest; + long i; + VALUE data_class; + st_table *tbl; + + rest = rb_ident_hash_new(); + RBASIC_CLEAR_CLASS(rest); + OBJ_WB_UNPROTECT(rest); + tbl = RHASH_TBL_RAW(rest); + for (i=0; i array_of_symbols + * + * Returns an array of member names of the data class: + * + * Measure = Data.define(:amount, :unit) + * Measure.members # => [:amount, :unit] + * + */ + +#define rb_data_s_members_m rb_struct_s_members_m + + +/* + * call-seq: + * new(*args) -> instance + * new(**kwargs) -> instance + * ::[](*args) -> instance + * ::[](**kwargs) -> instance + * + * Constructors for classes defined with ::define accept both positional and + * keyword arguments. + * + * Measure = Data.define(:amount, :unit) + * + * Measure.new(1, 'km') + * #=> # + * Measure.new(amount: 1, unit: 'km') + * #=> # + * + * # Alternative shorter intialization with [] + * Measure[1, 'km'] + * #=> # + * Measure[amount: 1, unit: 'km'] + * #=> # + * + * All arguments are mandatory (unlike Struct), and converted to keyword arguments: + * + * Measure.new(amount: 1) + * # in `initialize': missing keyword: :unit (ArgumentError) + * + * Measure.new(1) + * # in `initialize': missing keyword: :unit (ArgumentError) + * + * Note that Measure#initialize always receives keyword arguments, and that + * mandatory arguments are checked in +initialize+, not in +new+. This can be + * important for redefining initialize in order to convert arguments or provide + * defaults: + * + * Measure = Data.define(:amount, :unit) do + * NONE = Data.define + * + * def initialize(amount:, unit: NONE.new) + * super(amount: Float(amount), unit:) + * end + * end + * + * Measure.new('10', 'km') # => # + * Measure.new(10_000) # => #> + * + */ + +static VALUE +rb_data_initialize_m(int argc, const VALUE *argv, VALUE self) +{ + VALUE klass = rb_obj_class(self); + rb_struct_modify(self); + VALUE members = struct_ivar_get(klass, id_members); + size_t num_members = RARRAY_LEN(members); + + if (argc > 1 || !RB_TYPE_P(argv[0], T_HASH)) { + rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 0)", argc); + } + + if (RHASH_SIZE(argv[0]) < num_members) { + VALUE missing = rb_ary_diff(members, rb_hash_keys(argv[0])); + rb_exc_raise(rb_keyword_error_new("missing", missing)); + } + + struct struct_hash_set_arg arg; + rb_mem_clear((VALUE *)RSTRUCT_CONST_PTR(self), num_members); + arg.self = self; + arg.unknown_keywords = Qnil; + rb_hash_foreach(argv[0], struct_hash_set_i, (VALUE)&arg); + if (arg.unknown_keywords != Qnil) { + rb_exc_raise(rb_keyword_error_new("unknown", arg.unknown_keywords)); + } + return Qnil; +} + +/* + * call-seq: + * inspect -> string + * to_s -> string + * + * Returns a string representation of +self+: + * + * Measure = Data.define(:amount, :unit) + * + * distance = Measure[10, 'km'] + * + * p distance # uses #inspect underneath + * # + * + * puts distance # uses #to_s underneath, same representation + * # + * + */ + +static VALUE +rb_data_inspect(VALUE s) +{ + return rb_exec_recursive(inspect_struct, s, rb_str_new2("# true or false + * + * Returns +true+ if +other+ is the same class as +self+, and all members are + * equal. + * + * Examples: + * + * Measure = Data.new(:amount, :unit) + * + * Measure[1, 'km'] == Measure[1, 'km'] #=> true + * Measure[1, 'km'] == Measure[2, 'km'] #=> false + * Measure[1, 'km'] == Measure[1, 'm'] #=> false + * + * Measurement = Data.new(:amount, :unit) + * # Even though Measurement and Measure have the same "shape" + * # their instances are never equal + * Measure[1, 'km'] == Measurement[1, 'km'] #=> false + */ + +#define rb_data_equal rb_struct_equal + +/* + * call-seq: + * self.eql?(other) -> true or false + * + * Equality check that is used when two items of data are keys of a Hash. + * + * The subtle difference with #== is that members are also compared with their + * #eql? method, which might be important in some cases: + * + * Measure = Data.new(:amount, :unit) + * + * Measure[1, 'km'] == Measure[1.0, 'km'] #=> true, they are equal as values + * # ...but... + * Measure[1, 'km'].eql? Measure[1.0, 'km'] #=> false, they represent different hash keys + * + * See also Object#eql? for further explanations of the method usage. + */ + +#define rb_data_eql rb_struct_eql + +/* + * call-seq: + * hash -> integer + * + * Redefines Object#hash (used to distinguish objects as Hash keys) so that + * data objects of the same class with same content would have the same +hash+ + * value, and represented the same Hash key. + * + * Measure = Data.define(:amount, :unit) + * + * Measure[1, 'km'].hash == Measure[1, 'km'].hash #=> true + * Measure[1, 'km'].hash == Measure[10, 'km'].hash #=> false + * Measure[1, 'km'].hash == Measure[1, 'm'].hash #=> false + * Measure[1, 'km'].hash == Measure[1.0, 'km'].hash #=> false + * + * # Structurally similar data class, but shouldn't be considered + * # the same hash key + * Measurement = Data.define(:amount, :unit) + * + * Measure[1, 'km'].hash == Measurement[1, 'km'].hash #=> false + */ + +#define rb_data_hash rb_struct_hash + +/* + * call-seq: + * to_h -> hash + * to_h {|name, value| ... } -> hash + * + * Returns Hash representation of the data object. + * + * Measure = Data.define(:amount, :unit) + * distance = Measure[10, 'km'] + * + * distance.to_h + * #=> {:amount=>10, :unit=>"km"} + * + * Like Enumerable#to_h, if the block is provided, it is expected to + * produce key-value pairs to construct a hash: + * + * + * distance.to_h { |name, val| [name.to_s, val.to_s] } + * #=> {"amount"=>"10", "unit"=>"km"} + * + * Note that there is a useful symmetry between #to_h and #initialize: + * + * distance2 = Measure.new(**distance.to_h) + * #=> # + * distance2 == distance + * #=> true + */ + +#define rb_data_to_h rb_struct_to_h + +/* + * call-seq: + * members -> array_of_symbols + * + * Returns the member names from +self+ as an array: + * + * Measure = Data.define(:amount, :unit) + * distance = Measure[10, 'km'] + * + * distance.members #=> [:amount, :unit] + * + */ + +#define rb_data_members_m rb_struct_members_m + +/* + * call-seq: + * deconstruct -> array + * + * Returns the values in +self+ as an array, to use in pattern matching: + * + * Measure = Data.define(:amount, :unit) + * + * distance = Measure[10, 'km'] + * distance.deconstruct #=> [10, "km"] + * + * # usage + * case distance + * in n, 'km' # calls #deconstruct underneath + * puts "It is #{n} kilometers away" + * else + * puts "Don't know how to handle it" + * end + * # prints "It is 10 kilometers away" + * + * Or, with checking the class, too: + * + * case distance + * in Measure(n, 'km') + * puts "It is #{n} kilometers away" + * # ... + * end + */ + +#define rb_data_deconstruct rb_struct_to_a + +/* + * call-seq: + * deconstruct_keys(array_of_names_or_nil) -> hash + * + * Returns a hash of the name/value pairs, to use in pattern matching. + * + * Measure = Data.define(:amount, :unit) + * + * distance = Measure[10, 'km'] + * distance.deconstruct_keys(nil) #=> {:amount=>10, :unit=>"km"} + * distance.deconstruct_keys([:amount]) #=> {:amount=>10} + * + * # usage + * case distance + * in amount:, unit: 'km' # calls #deconstruct_keys underneath + * puts "It is #{amount} kilometers away" + * else + * puts "Don't know how to handle it" + * end + * # prints "It is 10 kilometers away" + * + * Or, with checking the class, too: + * + * case distance + * in Measure(amount:, unit: 'km') + * puts "It is #{amount} kilometers away" + * # ... + * end + */ + +#define rb_data_deconstruct_keys rb_struct_deconstruct_keys + /* * Document-class: Struct * @@ -1568,6 +2062,9 @@ rb_struct_dig(int argc, VALUE *argv, VALUE self) * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], * which provides dozens of additional methods. * + * See also Data, which is a somewhat similar, but stricter concept for defining immutable + * value objects. + * * Here, class \Struct provides methods that are useful for: * * - {Creating a Struct Subclass}[rdoc-ref:Struct@Methods+for+Creating+a+Struct+Subclass] @@ -1663,6 +2160,30 @@ InitVM_Struct(void) rb_define_method(rb_cStruct, "deconstruct", rb_struct_to_a, 0); rb_define_method(rb_cStruct, "deconstruct_keys", rb_struct_deconstruct_keys, 1); + + rb_cData = rb_define_class("Data", rb_cObject); + + rb_undef_method(CLASS_OF(rb_cData), "new"); + rb_undef_alloc_func(rb_cData); + rb_define_singleton_method(rb_cData, "define", rb_data_s_def, -1); + + rb_define_singleton_method(rb_cData, "members", rb_data_s_members_m, 0); + + rb_define_method(rb_cData, "initialize", rb_data_initialize_m, -1); + rb_define_method(rb_cData, "initialize_copy", rb_struct_init_copy, 1); + + rb_define_method(rb_cData, "==", rb_data_equal, 1); + rb_define_method(rb_cData, "eql?", rb_data_eql, 1); + rb_define_method(rb_cData, "hash", rb_data_hash, 0); + + rb_define_method(rb_cData, "inspect", rb_data_inspect, 0); + rb_define_alias(rb_cData, "to_s", "inspect"); + rb_define_method(rb_cData, "to_h", rb_data_to_h, 0); + + rb_define_method(rb_cData, "members", rb_data_members_m, 0); + + rb_define_method(rb_cData, "deconstruct", rb_data_deconstruct, 0); + rb_define_method(rb_cData, "deconstruct_keys", rb_data_deconstruct_keys, 1); } #undef rb_intern diff --git a/test/ruby/test_data.rb b/test/ruby/test_data.rb new file mode 100644 index 0000000000..0117cd22c5 --- /dev/null +++ b/test/ruby/test_data.rb @@ -0,0 +1,170 @@ +# -*- coding: us-ascii -*- +# frozen_string_literal: false +require 'test/unit' +require 'timeout' + +class TestData < Test::Unit::TestCase + def test_define + klass = Data.define(:foo, :bar) + assert_kind_of(Class, klass) + assert_equal(%i[foo bar], klass.members) + + assert_raise(NoMethodError) { Data.new(:foo) } + assert_raise(TypeError) { Data.define(0) } + + # Because some code is shared with Struct, check we don't share unnecessary functionality + assert_raise(TypeError) { Data.define(:foo, keyword_init: true) } + end + + def test_define_edge_cases + # non-ascii + klass = Data.define(:"r\u{e9}sum\u{e9}") + o = klass.new(1) + assert_equal(1, o.send(:"r\u{e9}sum\u{e9}")) + + # junk string + klass = Data.define(:"a\000") + o = klass.new(1) + assert_equal(1, o.send(:"a\000")) + + # special characters in attribute names + klass = Data.define(:a, :b?) + x = Object.new + o = klass.new("test", x) + assert_same(x, o.b?) + + klass = Data.define(:a, :b!) + x = Object.new + o = klass.new("test", x) + assert_same(x, o.b!) + + assert_raise(ArgumentError) { Data.define(:x=) } + assert_raise(ArgumentError, /duplicate member/) { Data.define(:x, :x) } + end + + def test_define_with_block + klass = Data.define(:a, :b) do + def c + a + b + end + end + + assert_equal(3, klass.new(1, 2).c) + end + + def test_initialize + klass = Data.define(:foo, :bar) + + # Regular + test = klass.new(1, 2) + assert_equal(1, test.foo) + assert_equal(2, test.bar) + assert_equal(test, klass.new(1, 2)) + + # Keywords + test_kw = klass.new(foo: 1, bar: 2) + assert_equal(1, test_kw.foo) + assert_equal(2, test_kw.bar) + assert_equal(test_kw, klass.new(foo: 1, bar: 2)) + assert_equal(test_kw, test) + + # Wrong protocol + assert_raise(ArgumentError) { klass.new(1) } + assert_raise(ArgumentError) { klass.new(1, 2, 3) } + assert_raise(ArgumentError) { klass.new(foo: 1) } + assert_raise(ArgumentError) { klass.new(foo: 1, bar: 2, baz: 3) } + # Could be converted to foo: 1, bar: 2, but too smart is confusing + assert_raise(ArgumentError) { klass.new(1, bar: 2) } + end + + def test_initialize_redefine + klass = Data.define(:foo, :bar) do + attr_reader :passed + + def initialize(*args, **kwargs) + @passed = [args, kwargs] + super(foo: 1, bar: 2) # so we can experiment with passing wrong numbers of args + end + end + + assert_equal([[], {foo: 1, bar: 2}], klass.new(foo: 1, bar: 2).passed) + + # Positional arguments are converted to keyword ones + assert_equal([[], {foo: 1, bar: 2}], klass.new(1, 2).passed) + + # Missing arguments can be fixed in initialize + assert_equal([[], {foo: 1}], klass.new(foo: 1).passed) + + # Extra keyword arguments can be dropped in initialize + assert_equal([[], {foo: 1, bar: 2, baz: 3}], klass.new(foo: 1, bar: 2, baz: 3).passed) + end + + def test_instance_behavior + klass = Data.define(:foo, :bar) + + test = klass.new(1, 2) + assert_equal(1, test.foo) + assert_equal(2, test.bar) + assert_equal(%i[foo bar], test.members) + assert_equal(1, test.public_send(:foo)) + assert_equal(0, test.method(:foo).arity) + assert_equal([], test.method(:foo).parameters) + + assert_equal({foo: 1, bar: 2}, test.to_h) + assert_equal({"foo"=>"1", "bar"=>"2"}, test.to_h { [_1.to_s, _2.to_s] }) + + assert_equal({foo: 1, bar: 2}, test.deconstruct_keys(nil)) + assert_equal({foo: 1}, test.deconstruct_keys(%i[foo])) + assert_equal({foo: 1}, test.deconstruct_keys(%i[foo baz])) + assert_raise(TypeError) { test.deconstruct_keys(0) } + + assert_kind_of(Integer, test.hash) + end + + def test_inspect + klass = Data.define(:a) + o = klass.new(1) + assert_equal("#", o.inspect) + + Object.const_set(:Foo, klass) + assert_equal("#", o.inspect) + Object.instance_eval { remove_const(:Foo) } + + klass = Data.define(:@a) + o = klass.new(1) + assert_equal("#", o.inspect) + end + + def test_equal + klass1 = Data.define(:a) + klass2 = Data.define(:a) + o1 = klass1.new(1) + o2 = klass1.new(1) + o3 = klass2.new(1) + assert_equal(o1, o2) + assert_not_equal(o1, o3) + end + + def test_eql + klass1 = Data.define(:a) + klass2 = Data.define(:a) + o1 = klass1.new(1) + o2 = klass1.new(1) + o3 = klass2.new(1) + assert_operator(o1, :eql?, o2) + assert_not_operator(o1, :eql?, o3) + end + + def test_memberless + klass = Data.define + + test = klass.new + + assert_equal(klass.new, test) + assert_not_equal(Data.define.new, test) + + assert_equal('#', test.inspect) + assert_equal([], test.members) + assert_equal({}, test.to_h) + end +end