From e5319dc9856298f38aa9cdc6ed55e39ad0e8e070 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 18 Oct 2021 16:23:54 +0200 Subject: [PATCH] pack.c: add an offset argument to unpack and unpack1 [Feature #18254] This is useful to avoid repeteadly copying strings when parsing binary formats --- pack.c | 19 ++++++++---- pack.rb | 31 +++++++++++++++----- spec/ruby/core/string/unpack/shared/basic.rb | 20 +++++++++++++ spec/ruby/core/string/unpack1_spec.rb | 20 +++++++++++++ test/ruby/test_pack.rb | 26 ++++++++++++++++ 5 files changed, 103 insertions(+), 13 deletions(-) diff --git a/pack.c b/pack.c index e6a729ab08..1fbbd724d7 100644 --- a/pack.c +++ b/pack.c @@ -944,7 +944,7 @@ hex2num(char c) #define UNPACK_1 2 static VALUE -pack_unpack_internal(VALUE str, VALUE fmt, int mode) +pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) { #define hexdigits ruby_hexdigits char *s, *send; @@ -973,8 +973,15 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode) StringValue(str); StringValue(fmt); + + if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative"); + len = RSTRING_LEN(str); + if (offset > len) rb_raise(rb_eArgError, "offset outside of string"); + s = RSTRING_PTR(str); - send = s + RSTRING_LEN(str); + send = s + len; + s += offset; + p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); @@ -1614,16 +1621,16 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode) } static VALUE -pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt) +pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) { int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; - return pack_unpack_internal(str, fmt, mode); + return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset)); } static VALUE -pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt) +pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) { - return pack_unpack_internal(str, fmt, UNPACK_1); + return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset)); } int diff --git a/pack.rb b/pack.rb index e4d629e0f2..8f01861dc9 100644 --- a/pack.rb +++ b/pack.rb @@ -148,10 +148,11 @@ end class String # call-seq: # str.unpack(format) -> anArray + # str.unpack(format, offset: anInteger) -> anArray # # Decodes str (which may contain binary data) according to the - # format string, returning an array of each value extracted. The - # format string consists of a sequence of single-character directives, + # format string, returning an array of each value extracted. + # The format string consists of a sequence of single-character directives, # summarized in the table at the end of this entry. # Each directive may be followed # by a number, indicating the number of times to repeat with this @@ -161,7 +162,15 @@ class String # exclamation mark (``!'') to use the underlying # platform's native size for the specified type; otherwise, it uses a # platform-independent consistent size. Spaces are ignored in the - # format string. See also String#unpack1, Array#pack. + # format string. + # + # The keyword offset can be given to start the decoding after skipping + # the specified amount of bytes: + # "abc".unpack("C*") # => [97, 98, 99] + # "abc".unpack("C*", offset: 2) # => [99] + # "abc".unpack("C*", offset: 4) # => offset outside of string (ArgumentError) + # + # See also String#unpack1, Array#pack. # # "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] # "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] @@ -263,15 +272,23 @@ class String # * J, J! j, and j! are available since Ruby 2.3. # * Q_, Q!, q_, and q! are available since Ruby 2.1. # * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3. - def unpack(fmt) - Primitive.pack_unpack(fmt) + def unpack(fmt, offset: 0) + Primitive.pack_unpack(fmt, offset) end # call-seq: # str.unpack1(format) -> obj + # str.unpack1(format, offset: anInteger) -> obj # # Decodes str (which may contain binary data) according to the # format string, returning the first value extracted. + # + # The keyword offset can be given to start the decoding after skipping + # the specified amount of bytes: + # "abc".unpack1("C*") # => 97 + # "abc".unpack1("C*", offset: 2) # => 99 + # "abc".unpack1("C*", offset: 4) # => offset outside of string (ArgumentError) + # # See also String#unpack, Array#pack. # # Contrast with String#unpack: @@ -287,7 +304,7 @@ class String # # Thus unpack1 is convenient, makes clear the intention and signals # the expected return value to those reading the code. - def unpack1(fmt) - Primitive.pack_unpack1(fmt) + def unpack1(fmt, offset: 0) + Primitive.pack_unpack1(fmt, offset) end end diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb index 332e39d8d1..f636f4689f 100644 --- a/spec/ruby/core/string/unpack/shared/basic.rb +++ b/spec/ruby/core/string/unpack/shared/basic.rb @@ -16,6 +16,12 @@ describe :string_unpack_basic, shared: true do it "raises a TypeError when passed an Integer" do -> { "abc".unpack(1) }.should raise_error(TypeError) end + + ruby_version_is "3.1" do + it "starts unpacking from the given offset" do + "abc".unpack("CC", offset: 1).should == [98, 99] + end + end end describe :string_unpack_no_platform, shared: true do @@ -26,4 +32,18 @@ describe :string_unpack_no_platform, shared: true do it "raises an ArgumentError when the format modifier is '!'" do -> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError) end + + ruby_version_is "3.1" do + it "raises an ArgumentError when the offset is negative" do + -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError) + end + + it "returns nil if the offset is at the end of the string" do + "a".unpack("C", offset: 1).should == [nil] + end + + it "raises an ArgumentError when the offset is larget than the string" do + -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError) + end + end end diff --git a/spec/ruby/core/string/unpack1_spec.rb b/spec/ruby/core/string/unpack1_spec.rb index 5fe81733fb..f59bd92d6a 100644 --- a/spec/ruby/core/string/unpack1_spec.rb +++ b/spec/ruby/core/string/unpack1_spec.rb @@ -7,4 +7,24 @@ describe "String#unpack1" do "aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga" "A".unpack1("B*").should == "01000001" end + + ruby_version_is "3.1" do + it "starts unpacking from the given offset" do + "ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0] + "ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga" + "ZA".unpack1("B*", offset: 1).should == "01000001" + end + + it "raises an ArgumentError when the offset is negative" do + -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError) + end + + it "returns nil if the offset is at the end of the string" do + "a".unpack1("C", offset: 1).should == nil + end + + it "raises an ArgumentError when the offset is larget than the string" do + -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError) + end + end end diff --git a/test/ruby/test_pack.rb b/test/ruby/test_pack.rb index c9ca9c3dfd..af45adb2b2 100644 --- a/test/ruby/test_pack.rb +++ b/test/ruby/test_pack.rb @@ -869,4 +869,30 @@ EXPECTED assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m") assert_equal "01000001", "A".unpack1("B*") end + + def test_unpack1_offset + assert_equal 65, "ZA".unpack1("C", offset: 1) + assert_equal "01000001", "YZA".unpack1("B*", offset: 2) + assert_nil "abc".unpack1("C", offset: 3) + assert_raise_with_message(ArgumentError, /offset can't be negative/) { + "a".unpack1("C", offset: -1) + } + assert_raise_with_message(ArgumentError, /offset outside of string/) { + "a".unpack1("C", offset: 2) + } + assert_nil "a".unpack1("C", offset: 1) + end + + def test_unpack_offset + assert_equal [65], "ZA".unpack("C", offset: 1) + assert_equal ["01000001"], "YZA".unpack("B*", offset: 2) + assert_equal [nil, nil, nil], "abc".unpack("CCC", offset: 3) + assert_raise_with_message(ArgumentError, /offset can't be negative/) { + "a".unpack("C", offset: -1) + } + assert_raise_with_message(ArgumentError, /offset outside of string/) { + "a".unpack("C", offset: 2) + } + assert_equal [nil], "a".unpack("C", offset: 1) + end end