String#unpack1 [Feature #12752]

Returns the first value of String#unpack. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56959 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2016-12-01 14:18:32 +00:00 · 2016-12-01 14:18:32 +00:00 · 306f43acfe
commit 306f43acfe
parent b6e137e93c
3 changed files with 164 additions and 125 deletions
--- a/2
+++ b/2
@ -159,6 +159,8 @@ with all sufficient information, see the ChangeLog file or Redmine
  * String#concat, String#prepend [Feature #12333]
    Now takes multiple arguments.

+  * String#unpack1 [Feature #12752]
+
 * Symbol

  * Symbol#casecmp? [Feature #12786]
--- a/pack.c
+++ b/pack.c
@ -1021,7 +1021,7 @@ hex2num(char c)
 } while (0)

 #define PACK_ITEM_ADJUST() do { \
-    if (tmp_len > 0 && !block_p) \
+    if (tmp_len > 0 && mode == UNPACK_ARRAY) \
 	rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
 } while (0)

@ -1043,128 +1043,13 @@ infected_str_new(const char *ptr, long len, VALUE str)
    return s;
 }

-/*
- *  call-seq:
- *     str.unpack(format)    ->  anArray
- *
- *  Decodes <i>str</i> (which may contain binary data) according to the
- *  format string, returning an array of each value extracted. The
- *  format string consists of a sequence of single-character directives,
- *  summarized in the table at the end of this entry.
- *  Each directive may be followed
- *  by a number, indicating the number of times to repeat with this
- *  directive. An asterisk (``<code>*</code>'') will use up all
- *  remaining elements. The directives <code>sSiIlL</code> may each be
- *  followed by an underscore (``<code>_</code>'') or
- *  exclamation mark (``<code>!</code>'') to use the underlying
- *  platform's native size for the specified type; otherwise, it uses a
- *  platform-independent consistent size. Spaces are ignored in the
- *  format string. See also <code>Array#pack</code>.
- *
- *     "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
- *     "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
- *     "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
- *     "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
- *     "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
- *     "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
- *     "now=20is".unpack('M*')             #=> ["now is"]
- *     "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
- *
- *  This table summarizes the various formats and the Ruby classes
- *  returned by each.
- *
- *   Integer       |         |
- *   Directive     | Returns | Meaning
- *   ------------------------------------------------------------------
- *   C             | Integer | 8-bit unsigned (unsigned char)
- *   S             | Integer | 16-bit unsigned, native endian (uint16_t)
- *   L             | Integer | 32-bit unsigned, native endian (uint32_t)
- *   Q             | Integer | 64-bit unsigned, native endian (uint64_t)
- *   J             | Integer | pointer width unsigned, native endian (uintptr_t)
- *                 |         | (J is available since Ruby 2.3.)
- *                 |         |
- *   c             | Integer | 8-bit signed (signed char)
- *   s             | Integer | 16-bit signed, native endian (int16_t)
- *   l             | Integer | 32-bit signed, native endian (int32_t)
- *   q             | Integer | 64-bit signed, native endian (int64_t)
- *   j             | Integer | pointer width signed, native endian (intptr_t)
- *                 |         | (j is available since Ruby 2.3.)
- *                 |         |
- *   S_ S!         | Integer | unsigned short, native endian
- *   I I_ I!       | Integer | unsigned int, native endian
- *   L_ L!         | Integer | unsigned long, native endian
- *   Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
- *                 |         | if the platform has no long long type.)
- *                 |         | (Q_ and Q! is available since Ruby 2.1.)
- *   J!            | Integer | uintptr_t, native endian (same with J)
- *                 |         | (J! is available since Ruby 2.3.)
- *                 |         |
- *   s_ s!         | Integer | signed short, native endian
- *   i i_ i!       | Integer | signed int, native endian
- *   l_ l!         | Integer | signed long, native endian
- *   q_ q!         | Integer | signed long long, native endian (ArgumentError
- *                 |         | if the platform has no long long type.)
- *                 |         | (q_ and q! is available since Ruby 2.1.)
- *   j!            | Integer | intptr_t, native endian (same with j)
- *                 |         | (j! is available since Ruby 2.3.)
- *                 |         |
- *   S> s> S!> s!> | Integer | same as the directives without ">" except
- *   L> l> L!> l!> |         | big endian
- *   I!> i!>       |         | (available since Ruby 1.9.3)
- *   Q> q> Q!> q!> |         | "S>" is same as "n"
- *   J> j> J!> j!> |         | "L>" is same as "N"
- *                 |         |
- *   S< s< S!< s!< | Integer | same as the directives without "<" except
- *   L< l< L!< l!< |         | little endian
- *   I!< i!<       |         | (available since Ruby 1.9.3)
- *   Q< q< Q!< q!< |         | "S<" is same as "v"
- *   J< j< J!< j!< |         | "L<" is same as "V"
- *                 |         |
- *   n             | Integer | 16-bit unsigned, network (big-endian) byte order
- *   N             | Integer | 32-bit unsigned, network (big-endian) byte order
- *   v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
- *   V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
- *                 |         |
- *   U             | Integer | UTF-8 character
- *   w             | Integer | BER-compressed integer (see Array.pack)
- *
- *   Float        |         |
- *   Directive    | Returns | Meaning
- *   -----------------------------------------------------------------
- *   D d          | Float   | double-precision, native format
- *   F f          | Float   | single-precision, native format
- *   E            | Float   | double-precision, little-endian byte order
- *   e            | Float   | single-precision, little-endian byte order
- *   G            | Float   | double-precision, network (big-endian) byte order
- *   g            | Float   | single-precision, network (big-endian) byte order
- *
- *   String       |         |
- *   Directive    | Returns | Meaning
- *   -----------------------------------------------------------------
- *   A            | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
- *   a            | String  | arbitrary binary string
- *   Z            | String  | null-terminated string
- *   B            | String  | bit string (MSB first)
- *   b            | String  | bit string (LSB first)
- *   H            | String  | hex string (high nibble first)
- *   h            | String  | hex string (low nibble first)
- *   u            | String  | UU-encoded string
- *   M            | String  | quoted-printable, MIME encoding (see RFC2045)
- *   m            | String  | base64 encoded string (RFC 2045) (default)
- *                |         | base64 encoded string (RFC 4648) if followed by 0
- *   P            | String  | pointer to a structure (fixed-length string)
- *   p            | String  | pointer to a null-terminated string
- *
- *   Misc.        |         |
- *   Directive    | Returns | Meaning
- *   -----------------------------------------------------------------
- *   @            | ---     | skip to the offset given by the length argument
- *   X            | ---     | skip backward one byte
- *   x            | ---     | skip forward one byte
- */
+/* unpack mode */
+#define UNPACK_ARRAY 0
+#define UNPACK_BLOCK 1
+#define UNPACK_1 2

 static VALUE
-pack_unpack(VALUE str, VALUE fmt)
+pack_unpack_internal(VALUE str, VALUE fmt, int mode)
 {
 #define hexdigits ruby_hexdigits
    char *s, *send;
@ -1177,16 +1062,18 @@ pack_unpack(VALUE str, VALUE fmt)
 #ifdef NATINT_PACK
    int natint;			/* native integer */
 #endif
-    int block_p = rb_block_given_p();
    int signed_p, integer_size, bigendian_p;
 #define UNPACK_PUSH(item) do {\
 	VALUE item_val = (item);\
-	if (block_p) {\
+	if ((mode) == UNPACK_BLOCK) {\
 	    rb_yield(item_val);\
 	}\
-	else {\
+	else if ((mode) == UNPACK_ARRAY) {\
 	    rb_ary_push(ary, item_val);\
 	}\
+	else /* if ((mode) == UNPACK_1) { */ {\
+	    return item_val; \
+	}\
    } while (0)

    StringValue(str);
@ -1196,7 +1083,7 @@ pack_unpack(VALUE str, VALUE fmt)
    p = RSTRING_PTR(fmt);
    pend = p + RSTRING_LEN(fmt);

-    ary = block_p ? Qnil : rb_ary_new();
+    ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
    while (p < pend) {
 	int explicit_endian = 0;
 	type = *p++;
@ -1868,6 +1755,148 @@ pack_unpack(VALUE str, VALUE fmt)
    return ary;
 }

+/*
+ *  call-seq:
+ *     str.unpack(format)    ->  anArray
+ *
+ *  Decodes <i>str</i> (which may contain binary data) according to the
+ *  format string, returning an array of each value extracted. The
+ *  format string consists of a sequence of single-character directives,
+ *  summarized in the table at the end of this entry.
+ *  Each directive may be followed
+ *  by a number, indicating the number of times to repeat with this
+ *  directive. An asterisk (``<code>*</code>'') will use up all
+ *  remaining elements. The directives <code>sSiIlL</code> may each be
+ *  followed by an underscore (``<code>_</code>'') or
+ *  exclamation mark (``<code>!</code>'') to use the underlying
+ *  platform's native size for the specified type; otherwise, it uses a
+ *  platform-independent consistent size. Spaces are ignored in the
+ *  format string. See also <code>String#unpack1</code>,  <code>Array#pack</code>.
+ *
+ *     "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
+ *     "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
+ *     "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
+ *     "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
+ *     "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
+ *     "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
+ *     "now=20is".unpack('M*')             #=> ["now is"]
+ *     "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
+ *
+ *  This table summarizes the various formats and the Ruby classes
+ *  returned by each.
+ *
+ *   Integer       |         |
+ *   Directive     | Returns | Meaning
+ *   ------------------------------------------------------------------
+ *   C             | Integer | 8-bit unsigned (unsigned char)
+ *   S             | Integer | 16-bit unsigned, native endian (uint16_t)
+ *   L             | Integer | 32-bit unsigned, native endian (uint32_t)
+ *   Q             | Integer | 64-bit unsigned, native endian (uint64_t)
+ *   J             | Integer | pointer width unsigned, native endian (uintptr_t)
+ *                 |         |
+ *   c             | Integer | 8-bit signed (signed char)
+ *   s             | Integer | 16-bit signed, native endian (int16_t)
+ *   l             | Integer | 32-bit signed, native endian (int32_t)
+ *   q             | Integer | 64-bit signed, native endian (int64_t)
+ *   j             | Integer | pointer width signed, native endian (intptr_t)
+ *                 |         |
+ *   S_ S!         | Integer | unsigned short, native endian
+ *   I I_ I!       | Integer | unsigned int, native endian
+ *   L_ L!         | Integer | unsigned long, native endian
+ *   Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
+ *                 |         | if the platform has no long long type.)
+ *   J!            | Integer | uintptr_t, native endian (same with J)
+ *                 |         |
+ *   s_ s!         | Integer | signed short, native endian
+ *   i i_ i!       | Integer | signed int, native endian
+ *   l_ l!         | Integer | signed long, native endian
+ *   q_ q!         | Integer | signed long long, native endian (ArgumentError
+ *                 |         | if the platform has no long long type.)
+ *   j!            | Integer | intptr_t, native endian (same with j)
+ *                 |         |
+ *   S> s> S!> s!> | Integer | same as the directives without ">" except
+ *   L> l> L!> l!> |         | big endian
+ *   I!> i!>       |         |
+ *   Q> q> Q!> q!> |         | "S>" is same as "n"
+ *   J> j> J!> j!> |         | "L>" is same as "N"
+ *                 |         |
+ *   S< s< S!< s!< | Integer | same as the directives without "<" except
+ *   L< l< L!< l!< |         | little endian
+ *   I!< i!<       |         |
+ *   Q< q< Q!< q!< |         | "S<" is same as "v"
+ *   J< j< J!< j!< |         | "L<" is same as "V"
+ *                 |         |
+ *   n             | Integer | 16-bit unsigned, network (big-endian) byte order
+ *   N             | Integer | 32-bit unsigned, network (big-endian) byte order
+ *   v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
+ *   V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
+ *                 |         |
+ *   U             | Integer | UTF-8 character
+ *   w             | Integer | BER-compressed integer (see Array.pack)
+ *
+ *   Float        |         |
+ *   Directive    | Returns | Meaning
+ *   -----------------------------------------------------------------
+ *   D d          | Float   | double-precision, native format
+ *   F f          | Float   | single-precision, native format
+ *   E            | Float   | double-precision, little-endian byte order
+ *   e            | Float   | single-precision, little-endian byte order
+ *   G            | Float   | double-precision, network (big-endian) byte order
+ *   g            | Float   | single-precision, network (big-endian) byte order
+ *
+ *   String       |         |
+ *   Directive    | Returns | Meaning
+ *   -----------------------------------------------------------------
+ *   A            | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
+ *   a            | String  | arbitrary binary string
+ *   Z            | String  | null-terminated string
+ *   B            | String  | bit string (MSB first)
+ *   b            | String  | bit string (LSB first)
+ *   H            | String  | hex string (high nibble first)
+ *   h            | String  | hex string (low nibble first)
+ *   u            | String  | UU-encoded string
+ *   M            | String  | quoted-printable, MIME encoding (see RFC2045)
+ *   m            | String  | base64 encoded string (RFC 2045) (default)
+ *                |         | base64 encoded string (RFC 4648) if followed by 0
+ *   P            | String  | pointer to a structure (fixed-length string)
+ *   p            | String  | pointer to a null-terminated string
+ *
+ *   Misc.        |         |
+ *   Directive    | Returns | Meaning
+ *   -----------------------------------------------------------------
+ *   @            | ---     | skip to the offset given by the length argument
+ *   X            | ---     | skip backward one byte
+ *   x            | ---     | skip forward one byte
+ *
+ *  HISTORY
+ *
+ *  * J, J! j, and j! are available since Ruby 2.3.
+ *  * Q_, Q!, q_, and q! are available since Ruby 2.1.
+ *  * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
+ */
+
+static VALUE
+pack_unpack(VALUE str, VALUE fmt)
+{
+    int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
+    return pack_unpack_internal(str, fmt, mode);
+}
+
+/*
+ *  call-seq:
+ *     str.unpack1(format)    ->  obj
+ *
+ *  Decodes <i>str</i> (which may contain binary data) according to the
+ *  format string, returning the first value extracted.
+ *  See also <code>String#unpack</code>, <code>Array#pack</code>.
+ */
+
+static VALUE
+pack_unpack1(VALUE str, VALUE fmt)
+{
+    return pack_unpack_internal(str, fmt, UNPACK_1);
+}
+
 int
 rb_uv_to_utf8(char buf[6], unsigned long uv)
 {
@ -1980,6 +2009,7 @@ Init_pack(void)
 {
    rb_define_method(rb_cArray, "pack", pack_pack, -1);
    rb_define_method(rb_cString, "unpack", pack_unpack, 1);
+    rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);

    id_associated = rb_make_internal_id();
 }
--- a/test/ruby/test_pack.rb
+++ b/test/ruby/test_pack.rb
@ -837,4 +837,11 @@ EXPECTED

    assert_equal addr, [buf].pack('p')
  end
+
+  def test_unpack1
+    assert_equal 65, "A".unpack1("C")
+    assert_equal 68, "ABCD".unpack1("x3C")
+    assert_equal 0x3042, "\u{3042 3044 3046}".unpack1("U*")
+    assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
+  end
 end