From 4954c9fc0f9d06aa4e3e8deb33b41f3fae294adc Mon Sep 17 00:00:00 2001 From: Vladimir Dementyev Date: Fri, 5 Aug 2022 20:13:09 -0400 Subject: [PATCH] Add MatchData#deconstruct/deconstruct_keys --- NEWS.md | 2 + re.c | 85 ++++++++++++++++++++++++++++++++++++++++ test/ruby/test_regexp.rb | 32 +++++++++++++++ 3 files changed, 119 insertions(+) diff --git a/NEWS.md b/NEWS.md index e675babdcf..a355d91d7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -141,6 +141,8 @@ Note: We're only listing outstanding class updates. * MatchData * MatchData#byteoffset has been added. [[Feature #13110]] + * MatchData#deconstruct has been added. [[Feature #18821]] + * MatchData#deconstruct_keys has been added. [[Feature #18821]] * Module * Module.used_refinements has been added. [[Feature #14332]] diff --git a/re.c b/re.c index 5091f9a124..ff9d5bccc4 100644 --- a/re.c +++ b/re.c @@ -2283,8 +2283,14 @@ match_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end, struct MEMO *memo = MEMO_CAST(arg); VALUE hash = memo->v1; VALUE match = memo->v2; + long symbolize = memo->u3.state; VALUE key = rb_enc_str_new((const char *)name, name_end-name, regex->enc); + + if (symbolize > 0) { + key = rb_str_intern(key); + } + VALUE value; int i; @@ -2348,6 +2354,83 @@ match_named_captures(VALUE match) return hash; } +/* + * call-seq: + * deconstruct_keys(array_of_names) -> hash + * + * Returns a hash of the named captures for the given names. + * + * m = /(?\d{2}):(?\d{2}):(?\d{2})/.match("18:37:22") + * m.deconstruct_keys([:hours, :minutes]) # => {:hours => "18", :minutes => "37"} + * m.deconstruct_keys(nil) # => {:hours => "18", :minutes => "37", :seconds => "22"} + * + * Returns an empty hash of no named captures were defined: + * + * m = /(\d{2}):(\d{2}):(\d{2})/.match("18:37:22") + * m.deconstruct_keys(nil) # => {} + * + */ +static VALUE +match_deconstruct_keys(VALUE match, VALUE keys) +{ + VALUE h; + long i; + + match_check(match); + + if (NIL_P(RMATCH(match)->regexp)) { + return rb_hash_new_with_size(0); + } + + if (NIL_P(keys)) { + h = rb_hash_new_with_size(onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp))); + + struct MEMO *memo; + memo = MEMO_NEW(h, match, 1); + + onig_foreach_name(RREGEXP_PTR(RMATCH(match)->regexp), match_named_captures_iter, (void*)memo); + + return h; + } + + if (UNLIKELY(!RB_TYPE_P(keys, T_ARRAY))) { + rb_raise(rb_eTypeError, + "wrong argument type %"PRIsVALUE" (expected Array or nil)", + rb_obj_class(keys)); + + } + + if (onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp)) < RARRAY_LEN(keys)) { + return rb_hash_new_with_size(0); + } + + h = rb_hash_new_with_size(RARRAY_LEN(keys)); + + for (i=0; iregexp, RMATCH(match)->regexp, + RSTRING_PTR(name), RSTRING_END(name)); + + if (num >= 0) { + rb_hash_aset(h, key, rb_reg_nth_match(num, match)); + } else { + return h; + } + } + + return h; +} + /* * call-seq: * string -> string @@ -4542,7 +4625,9 @@ Init_Regexp(void) rb_define_method(rb_cMatch, "to_a", match_to_a, 0); rb_define_method(rb_cMatch, "[]", match_aref, -1); rb_define_method(rb_cMatch, "captures", match_captures, 0); + rb_define_alias(rb_cMatch, "deconstruct", "captures"); rb_define_method(rb_cMatch, "named_captures", match_named_captures, 0); + rb_define_method(rb_cMatch, "deconstruct_keys", match_deconstruct_keys, 1); rb_define_method(rb_cMatch, "values_at", match_values_at, -1); rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0); rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0); diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 1d93d1a5b1..7d7d7e5180 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -608,6 +608,38 @@ class TestRegexp < Test::Unit::TestCase assert_equal('#', m.inspect) end + def test_match_data_deconstruct + m = /foo.+/.match("foobarbaz") + assert_equal([], m.deconstruct) + + m = /(foo).+(baz)/.match("foobarbaz") + assert_equal(["foo", "baz"], m.deconstruct) + + m = /(...)(...)(...)(...)?/.match("foobarbaz") + assert_equal(["foo", "bar", "baz", nil], m.deconstruct) + end + + def test_match_data_deconstruct_keys + m = /foo.+/.match("foobarbaz") + assert_equal({}, m.deconstruct_keys([:a])) + + m = /(?foo).+(?baz)/.match("foobarbaz") + assert_equal({a: "foo", b: "baz"}, m.deconstruct_keys(nil)) + assert_equal({a: "foo", b: "baz"}, m.deconstruct_keys([:a, :b])) + assert_equal({b: "baz"}, m.deconstruct_keys([:b])) + assert_equal({}, m.deconstruct_keys([:c, :a])) + assert_equal({a: "foo"}, m.deconstruct_keys([:a, :c])) + assert_equal({}, m.deconstruct_keys([:a, :b, :c])) + + assert_raise(TypeError) { + m.deconstruct_keys(0) + } + + assert_raise(TypeError) { + m.deconstruct_keys(["a", "b"]) + } + end + def test_initialize assert_raise(ArgumentError) { Regexp.new } assert_equal(/foo/, assert_warning(/ignored/) {Regexp.new(/foo/, Regexp::IGNORECASE)})