1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

String#unpack1 [Feature #12752]

Returns the first value of String#unpack.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56959 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2016-12-01 14:18:32 +00:00
parent b6e137e93c
commit 306f43acfe
3 changed files with 164 additions and 125 deletions

2
NEWS
View file

@ -159,6 +159,8 @@ with all sufficient information, see the ChangeLog file or Redmine
* String#concat, String#prepend [Feature #12333]
Now takes multiple arguments.
* String#unpack1 [Feature #12752]
* Symbol
* Symbol#casecmp? [Feature #12786]

280
pack.c
View file

@ -1021,7 +1021,7 @@ hex2num(char c)
} while (0)
#define PACK_ITEM_ADJUST() do { \
if (tmp_len > 0 && !block_p) \
if (tmp_len > 0 && mode == UNPACK_ARRAY) \
rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
} while (0)
@ -1043,128 +1043,13 @@ infected_str_new(const char *ptr, long len, VALUE str)
return s;
}
/*
* call-seq:
* str.unpack(format) -> anArray
*
* Decodes <i>str</i> (which may contain binary data) according to the
* format string, returning an array of each value extracted. The
* format string consists of a sequence of single-character directives,
* summarized in the table at the end of this entry.
* Each directive may be followed
* by a number, indicating the number of times to repeat with this
* directive. An asterisk (``<code>*</code>'') will use up all
* remaining elements. The directives <code>sSiIlL</code> may each be
* followed by an underscore (``<code>_</code>'') or
* exclamation mark (``<code>!</code>'') to use the underlying
* platform's native size for the specified type; otherwise, it uses a
* platform-independent consistent size. Spaces are ignored in the
* format string. See also <code>Array#pack</code>.
*
* "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
* "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
* "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
* "aa".unpack('b8B8') #=> ["10000110", "01100001"]
* "aaa".unpack('h2H2c') #=> ["16", "61", 97]
* "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
* "now=20is".unpack('M*') #=> ["now is"]
* "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
*
* This table summarizes the various formats and the Ruby classes
* returned by each.
*
* Integer | |
* Directive | Returns | Meaning
* ------------------------------------------------------------------
* C | Integer | 8-bit unsigned (unsigned char)
* S | Integer | 16-bit unsigned, native endian (uint16_t)
* L | Integer | 32-bit unsigned, native endian (uint32_t)
* Q | Integer | 64-bit unsigned, native endian (uint64_t)
* J | Integer | pointer width unsigned, native endian (uintptr_t)
* | | (J is available since Ruby 2.3.)
* | |
* c | Integer | 8-bit signed (signed char)
* s | Integer | 16-bit signed, native endian (int16_t)
* l | Integer | 32-bit signed, native endian (int32_t)
* q | Integer | 64-bit signed, native endian (int64_t)
* j | Integer | pointer width signed, native endian (intptr_t)
* | | (j is available since Ruby 2.3.)
* | |
* S_ S! | Integer | unsigned short, native endian
* I I_ I! | Integer | unsigned int, native endian
* L_ L! | Integer | unsigned long, native endian
* Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
* | | if the platform has no long long type.)
* | | (Q_ and Q! is available since Ruby 2.1.)
* J! | Integer | uintptr_t, native endian (same with J)
* | | (J! is available since Ruby 2.3.)
* | |
* s_ s! | Integer | signed short, native endian
* i i_ i! | Integer | signed int, native endian
* l_ l! | Integer | signed long, native endian
* q_ q! | Integer | signed long long, native endian (ArgumentError
* | | if the platform has no long long type.)
* | | (q_ and q! is available since Ruby 2.1.)
* j! | Integer | intptr_t, native endian (same with j)
* | | (j! is available since Ruby 2.3.)
* | |
* S> s> S!> s!> | Integer | same as the directives without ">" except
* L> l> L!> l!> | | big endian
* I!> i!> | | (available since Ruby 1.9.3)
* Q> q> Q!> q!> | | "S>" is same as "n"
* J> j> J!> j!> | | "L>" is same as "N"
* | |
* S< s< S!< s!< | Integer | same as the directives without "<" except
* L< l< L!< l!< | | little endian
* I!< i!< | | (available since Ruby 1.9.3)
* Q< q< Q!< q!< | | "S<" is same as "v"
* J< j< J!< j!< | | "L<" is same as "V"
* | |
* n | Integer | 16-bit unsigned, network (big-endian) byte order
* N | Integer | 32-bit unsigned, network (big-endian) byte order
* v | Integer | 16-bit unsigned, VAX (little-endian) byte order
* V | Integer | 32-bit unsigned, VAX (little-endian) byte order
* | |
* U | Integer | UTF-8 character
* w | Integer | BER-compressed integer (see Array.pack)
*
* Float | |
* Directive | Returns | Meaning
* -----------------------------------------------------------------
* D d | Float | double-precision, native format
* F f | Float | single-precision, native format
* E | Float | double-precision, little-endian byte order
* e | Float | single-precision, little-endian byte order
* G | Float | double-precision, network (big-endian) byte order
* g | Float | single-precision, network (big-endian) byte order
*
* String | |
* Directive | Returns | Meaning
* -----------------------------------------------------------------
* A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
* a | String | arbitrary binary string
* Z | String | null-terminated string
* B | String | bit string (MSB first)
* b | String | bit string (LSB first)
* H | String | hex string (high nibble first)
* h | String | hex string (low nibble first)
* u | String | UU-encoded string
* M | String | quoted-printable, MIME encoding (see RFC2045)
* m | String | base64 encoded string (RFC 2045) (default)
* | | base64 encoded string (RFC 4648) if followed by 0
* P | String | pointer to a structure (fixed-length string)
* p | String | pointer to a null-terminated string
*
* Misc. | |
* Directive | Returns | Meaning
* -----------------------------------------------------------------
* @ | --- | skip to the offset given by the length argument
* X | --- | skip backward one byte
* x | --- | skip forward one byte
*/
/* unpack mode */
#define UNPACK_ARRAY 0
#define UNPACK_BLOCK 1
#define UNPACK_1 2
static VALUE
pack_unpack(VALUE str, VALUE fmt)
pack_unpack_internal(VALUE str, VALUE fmt, int mode)
{
#define hexdigits ruby_hexdigits
char *s, *send;
@ -1177,16 +1062,18 @@ pack_unpack(VALUE str, VALUE fmt)
#ifdef NATINT_PACK
int natint; /* native integer */
#endif
int block_p = rb_block_given_p();
int signed_p, integer_size, bigendian_p;
#define UNPACK_PUSH(item) do {\
VALUE item_val = (item);\
if (block_p) {\
if ((mode) == UNPACK_BLOCK) {\
rb_yield(item_val);\
}\
else {\
else if ((mode) == UNPACK_ARRAY) {\
rb_ary_push(ary, item_val);\
}\
else /* if ((mode) == UNPACK_1) { */ {\
return item_val; \
}\
} while (0)
StringValue(str);
@ -1196,7 +1083,7 @@ pack_unpack(VALUE str, VALUE fmt)
p = RSTRING_PTR(fmt);
pend = p + RSTRING_LEN(fmt);
ary = block_p ? Qnil : rb_ary_new();
ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
while (p < pend) {
int explicit_endian = 0;
type = *p++;
@ -1868,6 +1755,148 @@ pack_unpack(VALUE str, VALUE fmt)
return ary;
}
/*
* call-seq:
* str.unpack(format) -> anArray
*
* Decodes <i>str</i> (which may contain binary data) according to the
* format string, returning an array of each value extracted. The
* format string consists of a sequence of single-character directives,
* summarized in the table at the end of this entry.
* Each directive may be followed
* by a number, indicating the number of times to repeat with this
* directive. An asterisk (``<code>*</code>'') will use up all
* remaining elements. The directives <code>sSiIlL</code> may each be
* followed by an underscore (``<code>_</code>'') or
* exclamation mark (``<code>!</code>'') to use the underlying
* platform's native size for the specified type; otherwise, it uses a
* platform-independent consistent size. Spaces are ignored in the
* format string. See also <code>String#unpack1</code>, <code>Array#pack</code>.
*
* "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
* "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
* "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
* "aa".unpack('b8B8') #=> ["10000110", "01100001"]
* "aaa".unpack('h2H2c') #=> ["16", "61", 97]
* "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
* "now=20is".unpack('M*') #=> ["now is"]
* "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
*
* This table summarizes the various formats and the Ruby classes
* returned by each.
*
* Integer | |
* Directive | Returns | Meaning
* ------------------------------------------------------------------
* C | Integer | 8-bit unsigned (unsigned char)
* S | Integer | 16-bit unsigned, native endian (uint16_t)
* L | Integer | 32-bit unsigned, native endian (uint32_t)
* Q | Integer | 64-bit unsigned, native endian (uint64_t)
* J | Integer | pointer width unsigned, native endian (uintptr_t)
* | |
* c | Integer | 8-bit signed (signed char)
* s | Integer | 16-bit signed, native endian (int16_t)
* l | Integer | 32-bit signed, native endian (int32_t)
* q | Integer | 64-bit signed, native endian (int64_t)
* j | Integer | pointer width signed, native endian (intptr_t)
* | |
* S_ S! | Integer | unsigned short, native endian
* I I_ I! | Integer | unsigned int, native endian
* L_ L! | Integer | unsigned long, native endian
* Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
* | | if the platform has no long long type.)
* J! | Integer | uintptr_t, native endian (same with J)
* | |
* s_ s! | Integer | signed short, native endian
* i i_ i! | Integer | signed int, native endian
* l_ l! | Integer | signed long, native endian
* q_ q! | Integer | signed long long, native endian (ArgumentError
* | | if the platform has no long long type.)
* j! | Integer | intptr_t, native endian (same with j)
* | |
* S> s> S!> s!> | Integer | same as the directives without ">" except
* L> l> L!> l!> | | big endian
* I!> i!> | |
* Q> q> Q!> q!> | | "S>" is same as "n"
* J> j> J!> j!> | | "L>" is same as "N"
* | |
* S< s< S!< s!< | Integer | same as the directives without "<" except
* L< l< L!< l!< | | little endian
* I!< i!< | |
* Q< q< Q!< q!< | | "S<" is same as "v"
* J< j< J!< j!< | | "L<" is same as "V"
* | |
* n | Integer | 16-bit unsigned, network (big-endian) byte order
* N | Integer | 32-bit unsigned, network (big-endian) byte order
* v | Integer | 16-bit unsigned, VAX (little-endian) byte order
* V | Integer | 32-bit unsigned, VAX (little-endian) byte order
* | |
* U | Integer | UTF-8 character
* w | Integer | BER-compressed integer (see Array.pack)
*
* Float | |
* Directive | Returns | Meaning
* -----------------------------------------------------------------
* D d | Float | double-precision, native format
* F f | Float | single-precision, native format
* E | Float | double-precision, little-endian byte order
* e | Float | single-precision, little-endian byte order
* G | Float | double-precision, network (big-endian) byte order
* g | Float | single-precision, network (big-endian) byte order
*
* String | |
* Directive | Returns | Meaning
* -----------------------------------------------------------------
* A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
* a | String | arbitrary binary string
* Z | String | null-terminated string
* B | String | bit string (MSB first)
* b | String | bit string (LSB first)
* H | String | hex string (high nibble first)
* h | String | hex string (low nibble first)
* u | String | UU-encoded string
* M | String | quoted-printable, MIME encoding (see RFC2045)
* m | String | base64 encoded string (RFC 2045) (default)
* | | base64 encoded string (RFC 4648) if followed by 0
* P | String | pointer to a structure (fixed-length string)
* p | String | pointer to a null-terminated string
*
* Misc. | |
* Directive | Returns | Meaning
* -----------------------------------------------------------------
* @ | --- | skip to the offset given by the length argument
* X | --- | skip backward one byte
* x | --- | skip forward one byte
*
* HISTORY
*
* * J, J! j, and j! are available since Ruby 2.3.
* * Q_, Q!, q_, and q! are available since Ruby 2.1.
* * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
*/
static VALUE
pack_unpack(VALUE str, VALUE fmt)
{
int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
return pack_unpack_internal(str, fmt, mode);
}
/*
* call-seq:
* str.unpack1(format) -> obj
*
* Decodes <i>str</i> (which may contain binary data) according to the
* format string, returning the first value extracted.
* See also <code>String#unpack</code>, <code>Array#pack</code>.
*/
static VALUE
pack_unpack1(VALUE str, VALUE fmt)
{
return pack_unpack_internal(str, fmt, UNPACK_1);
}
int
rb_uv_to_utf8(char buf[6], unsigned long uv)
{
@ -1980,6 +2009,7 @@ Init_pack(void)
{
rb_define_method(rb_cArray, "pack", pack_pack, -1);
rb_define_method(rb_cString, "unpack", pack_unpack, 1);
rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
id_associated = rb_make_internal_id();
}

View file

@ -837,4 +837,11 @@ EXPECTED
assert_equal addr, [buf].pack('p')
end
def test_unpack1
assert_equal 65, "A".unpack1("C")
assert_equal 68, "ABCD".unpack1("x3C")
assert_equal 0x3042, "\u{3042 3044 3046}".unpack1("U*")
assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
end
end