From e5319dc9856298f38aa9cdc6ed55e39ad0e8e070 Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@gmail.com>
Date: Mon, 18 Oct 2021 16:23:54 +0200
Subject: [PATCH] pack.c: add an offset argument to unpack and unpack1

[Feature #18254]

This is useful to avoid repeteadly copying strings when parsing binary formats
---
 pack.c                                       | 19 ++++++++----
 pack.rb                                      | 31 +++++++++++++++-----
 spec/ruby/core/string/unpack/shared/basic.rb | 20 +++++++++++++
 spec/ruby/core/string/unpack1_spec.rb        | 20 +++++++++++++
 test/ruby/test_pack.rb                       | 26 ++++++++++++++++
 5 files changed, 103 insertions(+), 13 deletions(-)
diff --git a/pack.c b/pack.c
index e6a729ab08..1fbbd724d7 100644
--- a/pack.c
+++ b/pack.c
@@ -944,7 +944,7 @@ hex2num(char c)
 #define UNPACK_1 2
 
 static VALUE
-pack_unpack_internal(VALUE str, VALUE fmt, int mode)
+pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset)
 {
 #define hexdigits ruby_hexdigits
     char *s, *send;
@@ -973,8 +973,15 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
 
     StringValue(str);
     StringValue(fmt);
+
+    if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
+    len = RSTRING_LEN(str);
+    if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
+
     s = RSTRING_PTR(str);
-    send = s + RSTRING_LEN(str);
+    send = s + len;
+    s += offset;
+
     p = RSTRING_PTR(fmt);
     pend = p + RSTRING_LEN(fmt);
 
@@ -1614,16 +1621,16 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
 }
 
 static VALUE
-pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt)
+pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
 {
     int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
-    return pack_unpack_internal(str, fmt, mode);
+    return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
 }
 
 static VALUE
-pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt)
+pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
 {
-    return pack_unpack_internal(str, fmt, UNPACK_1);
+    return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
 }
 
 int
diff --git a/pack.rb b/pack.rb
index e4d629e0f2..8f01861dc9 100644
--- a/pack.rb
+++ b/pack.rb
@@ -148,10 +148,11 @@ end
 class String
   # call-seq:
   #    str.unpack(format)    ->  anArray
+  #    str.unpack(format, offset: anInteger)    ->  anArray
   #
   # Decodes <i>str</i> (which may contain binary data) according to the
-  # format string, returning an array of each value extracted. The
-  # format string consists of a sequence of single-character directives,
+  # format string, returning an array of each value extracted.
+  # The format string consists of a sequence of single-character directives,
   # summarized in the table at the end of this entry.
   # Each directive may be followed
   # by a number, indicating the number of times to repeat with this
@@ -161,7 +162,15 @@ class String
   # exclamation mark (``<code>!</code>'') to use the underlying
   # platform's native size for the specified type; otherwise, it uses a
   # platform-independent consistent size. Spaces are ignored in the
-  # format string. See also String#unpack1,  Array#pack.
+  # format string.
+  #
+  # The keyword <i>offset</i> can be given to start the decoding after skipping
+  # the specified amount of bytes:
+  #   "abc".unpack("C*") # => [97, 98, 99]
+  #   "abc".unpack("C*", offset: 2) # => [99]
+  #   "abc".unpack("C*", offset: 4) # => offset outside of string (ArgumentError)
+  #
+  # See also String#unpack1,  Array#pack.
   #
   #    "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
   #    "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
@@ -263,15 +272,23 @@ class String
   # * J, J! j, and j! are available since Ruby 2.3.
   # * Q_, Q!, q_, and q! are available since Ruby 2.1.
   # * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
-  def unpack(fmt)
-    Primitive.pack_unpack(fmt)
+  def unpack(fmt, offset: 0)
+    Primitive.pack_unpack(fmt, offset)
   end
 
   # call-seq:
   #    str.unpack1(format)    ->  obj
+  #    str.unpack1(format, offset: anInteger)    ->  obj
   #
   # Decodes <i>str</i> (which may contain binary data) according to the
   # format string, returning the first value extracted.
+  #
+  # The keyword <i>offset</i> can be given to start the decoding after skipping
+  # the specified amount of bytes:
+  #   "abc".unpack1("C*") # => 97
+  #   "abc".unpack1("C*", offset: 2) # => 99
+  #   "abc".unpack1("C*", offset: 4) # => offset outside of string (ArgumentError)
+  #
   # See also String#unpack, Array#pack.
   #
   # Contrast with String#unpack:
@@ -287,7 +304,7 @@ class String
   #
   # Thus unpack1 is convenient, makes clear the intention and signals
   # the expected return value to those reading the code.
-  def unpack1(fmt)
-    Primitive.pack_unpack1(fmt)
+  def unpack1(fmt, offset: 0)
+    Primitive.pack_unpack1(fmt, offset)
   end
 end
diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb
index 332e39d8d1..f636f4689f 100644
--- a/spec/ruby/core/string/unpack/shared/basic.rb
+++ b/spec/ruby/core/string/unpack/shared/basic.rb
@@ -16,6 +16,12 @@ describe :string_unpack_basic, shared: true do
   it "raises a TypeError when passed an Integer" do
     -> { "abc".unpack(1) }.should raise_error(TypeError)
   end
+
+  ruby_version_is "3.1" do
+    it "starts unpacking from the given offset" do
+      "abc".unpack("CC", offset: 1).should == [98, 99]
+    end
+  end
 end
 
 describe :string_unpack_no_platform, shared: true do
@@ -26,4 +32,18 @@ describe :string_unpack_no_platform, shared: true do
   it "raises an ArgumentError when the format modifier is '!'" do
     -> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError)
   end
+
+  ruby_version_is "3.1" do
+    it "raises an ArgumentError when the offset is negative" do
+      -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError)
+    end
+
+    it "returns nil if the offset is at the end of the string" do
+      "a".unpack("C", offset: 1).should == [nil]
+    end
+
+    it "raises an ArgumentError when the offset is larget than the string" do
+      -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError)
+    end
+  end
 end
diff --git a/spec/ruby/core/string/unpack1_spec.rb b/spec/ruby/core/string/unpack1_spec.rb
index 5fe81733fb..f59bd92d6a 100644
--- a/spec/ruby/core/string/unpack1_spec.rb
+++ b/spec/ruby/core/string/unpack1_spec.rb
@@ -7,4 +7,24 @@ describe "String#unpack1" do
     "aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga"
     "A".unpack1("B*").should == "01000001"
   end
+
+  ruby_version_is "3.1" do
+    it "starts unpacking from the given offset" do
+      "ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0]
+      "ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga"
+      "ZA".unpack1("B*", offset: 1).should == "01000001"
+    end
+
+    it "raises an ArgumentError when the offset is negative" do
+      -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError)
+    end
+
+    it "returns nil if the offset is at the end of the string" do
+      "a".unpack1("C", offset: 1).should == nil
+    end
+
+    it "raises an ArgumentError when the offset is larget than the string" do
+      -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError)
+    end
+  end
 end
diff --git a/test/ruby/test_pack.rb b/test/ruby/test_pack.rb
index c9ca9c3dfd..af45adb2b2 100644
--- a/test/ruby/test_pack.rb
+++ b/test/ruby/test_pack.rb
@@ -869,4 +869,30 @@ EXPECTED
     assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
     assert_equal "01000001", "A".unpack1("B*")
   end
+
+  def test_unpack1_offset
+    assert_equal 65, "ZA".unpack1("C", offset: 1)
+    assert_equal "01000001", "YZA".unpack1("B*", offset: 2)
+    assert_nil "abc".unpack1("C", offset: 3)
+    assert_raise_with_message(ArgumentError, /offset can't be negative/) {
+      "a".unpack1("C", offset: -1)
+    }
+    assert_raise_with_message(ArgumentError, /offset outside of string/) {
+      "a".unpack1("C", offset: 2)
+    }
+    assert_nil "a".unpack1("C", offset: 1)
+  end
+
+  def test_unpack_offset
+    assert_equal [65], "ZA".unpack("C", offset: 1)
+    assert_equal ["01000001"], "YZA".unpack("B*", offset: 2)
+    assert_equal [nil, nil, nil], "abc".unpack("CCC", offset: 3)
+    assert_raise_with_message(ArgumentError, /offset can't be negative/) {
+      "a".unpack("C", offset: -1)
+    }
+    assert_raise_with_message(ArgumentError, /offset outside of string/) {
+      "a".unpack("C", offset: 2)
+    }
+    assert_equal [nil], "a".unpack("C", offset: 1)
+  end
 end