mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
String#unpack1 [Feature #12752]
Returns the first value of String#unpack. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56959 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
b6e137e93c
commit
306f43acfe
3 changed files with 164 additions and 125 deletions
2
NEWS
2
NEWS
|
@ -159,6 +159,8 @@ with all sufficient information, see the ChangeLog file or Redmine
|
|||
* String#concat, String#prepend [Feature #12333]
|
||||
Now takes multiple arguments.
|
||||
|
||||
* String#unpack1 [Feature #12752]
|
||||
|
||||
* Symbol
|
||||
|
||||
* Symbol#casecmp? [Feature #12786]
|
||||
|
|
280
pack.c
280
pack.c
|
@ -1021,7 +1021,7 @@ hex2num(char c)
|
|||
} while (0)
|
||||
|
||||
#define PACK_ITEM_ADJUST() do { \
|
||||
if (tmp_len > 0 && !block_p) \
|
||||
if (tmp_len > 0 && mode == UNPACK_ARRAY) \
|
||||
rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
|
||||
} while (0)
|
||||
|
||||
|
@ -1043,128 +1043,13 @@ infected_str_new(const char *ptr, long len, VALUE str)
|
|||
return s;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* str.unpack(format) -> anArray
|
||||
*
|
||||
* Decodes <i>str</i> (which may contain binary data) according to the
|
||||
* format string, returning an array of each value extracted. The
|
||||
* format string consists of a sequence of single-character directives,
|
||||
* summarized in the table at the end of this entry.
|
||||
* Each directive may be followed
|
||||
* by a number, indicating the number of times to repeat with this
|
||||
* directive. An asterisk (``<code>*</code>'') will use up all
|
||||
* remaining elements. The directives <code>sSiIlL</code> may each be
|
||||
* followed by an underscore (``<code>_</code>'') or
|
||||
* exclamation mark (``<code>!</code>'') to use the underlying
|
||||
* platform's native size for the specified type; otherwise, it uses a
|
||||
* platform-independent consistent size. Spaces are ignored in the
|
||||
* format string. See also <code>Array#pack</code>.
|
||||
*
|
||||
* "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
|
||||
* "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
|
||||
* "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
|
||||
* "aa".unpack('b8B8') #=> ["10000110", "01100001"]
|
||||
* "aaa".unpack('h2H2c') #=> ["16", "61", 97]
|
||||
* "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
|
||||
* "now=20is".unpack('M*') #=> ["now is"]
|
||||
* "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
|
||||
*
|
||||
* This table summarizes the various formats and the Ruby classes
|
||||
* returned by each.
|
||||
*
|
||||
* Integer | |
|
||||
* Directive | Returns | Meaning
|
||||
* ------------------------------------------------------------------
|
||||
* C | Integer | 8-bit unsigned (unsigned char)
|
||||
* S | Integer | 16-bit unsigned, native endian (uint16_t)
|
||||
* L | Integer | 32-bit unsigned, native endian (uint32_t)
|
||||
* Q | Integer | 64-bit unsigned, native endian (uint64_t)
|
||||
* J | Integer | pointer width unsigned, native endian (uintptr_t)
|
||||
* | | (J is available since Ruby 2.3.)
|
||||
* | |
|
||||
* c | Integer | 8-bit signed (signed char)
|
||||
* s | Integer | 16-bit signed, native endian (int16_t)
|
||||
* l | Integer | 32-bit signed, native endian (int32_t)
|
||||
* q | Integer | 64-bit signed, native endian (int64_t)
|
||||
* j | Integer | pointer width signed, native endian (intptr_t)
|
||||
* | | (j is available since Ruby 2.3.)
|
||||
* | |
|
||||
* S_ S! | Integer | unsigned short, native endian
|
||||
* I I_ I! | Integer | unsigned int, native endian
|
||||
* L_ L! | Integer | unsigned long, native endian
|
||||
* Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
|
||||
* | | if the platform has no long long type.)
|
||||
* | | (Q_ and Q! is available since Ruby 2.1.)
|
||||
* J! | Integer | uintptr_t, native endian (same with J)
|
||||
* | | (J! is available since Ruby 2.3.)
|
||||
* | |
|
||||
* s_ s! | Integer | signed short, native endian
|
||||
* i i_ i! | Integer | signed int, native endian
|
||||
* l_ l! | Integer | signed long, native endian
|
||||
* q_ q! | Integer | signed long long, native endian (ArgumentError
|
||||
* | | if the platform has no long long type.)
|
||||
* | | (q_ and q! is available since Ruby 2.1.)
|
||||
* j! | Integer | intptr_t, native endian (same with j)
|
||||
* | | (j! is available since Ruby 2.3.)
|
||||
* | |
|
||||
* S> s> S!> s!> | Integer | same as the directives without ">" except
|
||||
* L> l> L!> l!> | | big endian
|
||||
* I!> i!> | | (available since Ruby 1.9.3)
|
||||
* Q> q> Q!> q!> | | "S>" is same as "n"
|
||||
* J> j> J!> j!> | | "L>" is same as "N"
|
||||
* | |
|
||||
* S< s< S!< s!< | Integer | same as the directives without "<" except
|
||||
* L< l< L!< l!< | | little endian
|
||||
* I!< i!< | | (available since Ruby 1.9.3)
|
||||
* Q< q< Q!< q!< | | "S<" is same as "v"
|
||||
* J< j< J!< j!< | | "L<" is same as "V"
|
||||
* | |
|
||||
* n | Integer | 16-bit unsigned, network (big-endian) byte order
|
||||
* N | Integer | 32-bit unsigned, network (big-endian) byte order
|
||||
* v | Integer | 16-bit unsigned, VAX (little-endian) byte order
|
||||
* V | Integer | 32-bit unsigned, VAX (little-endian) byte order
|
||||
* | |
|
||||
* U | Integer | UTF-8 character
|
||||
* w | Integer | BER-compressed integer (see Array.pack)
|
||||
*
|
||||
* Float | |
|
||||
* Directive | Returns | Meaning
|
||||
* -----------------------------------------------------------------
|
||||
* D d | Float | double-precision, native format
|
||||
* F f | Float | single-precision, native format
|
||||
* E | Float | double-precision, little-endian byte order
|
||||
* e | Float | single-precision, little-endian byte order
|
||||
* G | Float | double-precision, network (big-endian) byte order
|
||||
* g | Float | single-precision, network (big-endian) byte order
|
||||
*
|
||||
* String | |
|
||||
* Directive | Returns | Meaning
|
||||
* -----------------------------------------------------------------
|
||||
* A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
|
||||
* a | String | arbitrary binary string
|
||||
* Z | String | null-terminated string
|
||||
* B | String | bit string (MSB first)
|
||||
* b | String | bit string (LSB first)
|
||||
* H | String | hex string (high nibble first)
|
||||
* h | String | hex string (low nibble first)
|
||||
* u | String | UU-encoded string
|
||||
* M | String | quoted-printable, MIME encoding (see RFC2045)
|
||||
* m | String | base64 encoded string (RFC 2045) (default)
|
||||
* | | base64 encoded string (RFC 4648) if followed by 0
|
||||
* P | String | pointer to a structure (fixed-length string)
|
||||
* p | String | pointer to a null-terminated string
|
||||
*
|
||||
* Misc. | |
|
||||
* Directive | Returns | Meaning
|
||||
* -----------------------------------------------------------------
|
||||
* @ | --- | skip to the offset given by the length argument
|
||||
* X | --- | skip backward one byte
|
||||
* x | --- | skip forward one byte
|
||||
*/
|
||||
/* unpack mode */
|
||||
#define UNPACK_ARRAY 0
|
||||
#define UNPACK_BLOCK 1
|
||||
#define UNPACK_1 2
|
||||
|
||||
static VALUE
|
||||
pack_unpack(VALUE str, VALUE fmt)
|
||||
pack_unpack_internal(VALUE str, VALUE fmt, int mode)
|
||||
{
|
||||
#define hexdigits ruby_hexdigits
|
||||
char *s, *send;
|
||||
|
@ -1177,16 +1062,18 @@ pack_unpack(VALUE str, VALUE fmt)
|
|||
#ifdef NATINT_PACK
|
||||
int natint; /* native integer */
|
||||
#endif
|
||||
int block_p = rb_block_given_p();
|
||||
int signed_p, integer_size, bigendian_p;
|
||||
#define UNPACK_PUSH(item) do {\
|
||||
VALUE item_val = (item);\
|
||||
if (block_p) {\
|
||||
if ((mode) == UNPACK_BLOCK) {\
|
||||
rb_yield(item_val);\
|
||||
}\
|
||||
else {\
|
||||
else if ((mode) == UNPACK_ARRAY) {\
|
||||
rb_ary_push(ary, item_val);\
|
||||
}\
|
||||
else /* if ((mode) == UNPACK_1) { */ {\
|
||||
return item_val; \
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
StringValue(str);
|
||||
|
@ -1196,7 +1083,7 @@ pack_unpack(VALUE str, VALUE fmt)
|
|||
p = RSTRING_PTR(fmt);
|
||||
pend = p + RSTRING_LEN(fmt);
|
||||
|
||||
ary = block_p ? Qnil : rb_ary_new();
|
||||
ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
|
||||
while (p < pend) {
|
||||
int explicit_endian = 0;
|
||||
type = *p++;
|
||||
|
@ -1868,6 +1755,148 @@ pack_unpack(VALUE str, VALUE fmt)
|
|||
return ary;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* str.unpack(format) -> anArray
|
||||
*
|
||||
* Decodes <i>str</i> (which may contain binary data) according to the
|
||||
* format string, returning an array of each value extracted. The
|
||||
* format string consists of a sequence of single-character directives,
|
||||
* summarized in the table at the end of this entry.
|
||||
* Each directive may be followed
|
||||
* by a number, indicating the number of times to repeat with this
|
||||
* directive. An asterisk (``<code>*</code>'') will use up all
|
||||
* remaining elements. The directives <code>sSiIlL</code> may each be
|
||||
* followed by an underscore (``<code>_</code>'') or
|
||||
* exclamation mark (``<code>!</code>'') to use the underlying
|
||||
* platform's native size for the specified type; otherwise, it uses a
|
||||
* platform-independent consistent size. Spaces are ignored in the
|
||||
* format string. See also <code>String#unpack1</code>, <code>Array#pack</code>.
|
||||
*
|
||||
* "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
|
||||
* "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
|
||||
* "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
|
||||
* "aa".unpack('b8B8') #=> ["10000110", "01100001"]
|
||||
* "aaa".unpack('h2H2c') #=> ["16", "61", 97]
|
||||
* "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
|
||||
* "now=20is".unpack('M*') #=> ["now is"]
|
||||
* "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
|
||||
*
|
||||
* This table summarizes the various formats and the Ruby classes
|
||||
* returned by each.
|
||||
*
|
||||
* Integer | |
|
||||
* Directive | Returns | Meaning
|
||||
* ------------------------------------------------------------------
|
||||
* C | Integer | 8-bit unsigned (unsigned char)
|
||||
* S | Integer | 16-bit unsigned, native endian (uint16_t)
|
||||
* L | Integer | 32-bit unsigned, native endian (uint32_t)
|
||||
* Q | Integer | 64-bit unsigned, native endian (uint64_t)
|
||||
* J | Integer | pointer width unsigned, native endian (uintptr_t)
|
||||
* | |
|
||||
* c | Integer | 8-bit signed (signed char)
|
||||
* s | Integer | 16-bit signed, native endian (int16_t)
|
||||
* l | Integer | 32-bit signed, native endian (int32_t)
|
||||
* q | Integer | 64-bit signed, native endian (int64_t)
|
||||
* j | Integer | pointer width signed, native endian (intptr_t)
|
||||
* | |
|
||||
* S_ S! | Integer | unsigned short, native endian
|
||||
* I I_ I! | Integer | unsigned int, native endian
|
||||
* L_ L! | Integer | unsigned long, native endian
|
||||
* Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
|
||||
* | | if the platform has no long long type.)
|
||||
* J! | Integer | uintptr_t, native endian (same with J)
|
||||
* | |
|
||||
* s_ s! | Integer | signed short, native endian
|
||||
* i i_ i! | Integer | signed int, native endian
|
||||
* l_ l! | Integer | signed long, native endian
|
||||
* q_ q! | Integer | signed long long, native endian (ArgumentError
|
||||
* | | if the platform has no long long type.)
|
||||
* j! | Integer | intptr_t, native endian (same with j)
|
||||
* | |
|
||||
* S> s> S!> s!> | Integer | same as the directives without ">" except
|
||||
* L> l> L!> l!> | | big endian
|
||||
* I!> i!> | |
|
||||
* Q> q> Q!> q!> | | "S>" is same as "n"
|
||||
* J> j> J!> j!> | | "L>" is same as "N"
|
||||
* | |
|
||||
* S< s< S!< s!< | Integer | same as the directives without "<" except
|
||||
* L< l< L!< l!< | | little endian
|
||||
* I!< i!< | |
|
||||
* Q< q< Q!< q!< | | "S<" is same as "v"
|
||||
* J< j< J!< j!< | | "L<" is same as "V"
|
||||
* | |
|
||||
* n | Integer | 16-bit unsigned, network (big-endian) byte order
|
||||
* N | Integer | 32-bit unsigned, network (big-endian) byte order
|
||||
* v | Integer | 16-bit unsigned, VAX (little-endian) byte order
|
||||
* V | Integer | 32-bit unsigned, VAX (little-endian) byte order
|
||||
* | |
|
||||
* U | Integer | UTF-8 character
|
||||
* w | Integer | BER-compressed integer (see Array.pack)
|
||||
*
|
||||
* Float | |
|
||||
* Directive | Returns | Meaning
|
||||
* -----------------------------------------------------------------
|
||||
* D d | Float | double-precision, native format
|
||||
* F f | Float | single-precision, native format
|
||||
* E | Float | double-precision, little-endian byte order
|
||||
* e | Float | single-precision, little-endian byte order
|
||||
* G | Float | double-precision, network (big-endian) byte order
|
||||
* g | Float | single-precision, network (big-endian) byte order
|
||||
*
|
||||
* String | |
|
||||
* Directive | Returns | Meaning
|
||||
* -----------------------------------------------------------------
|
||||
* A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
|
||||
* a | String | arbitrary binary string
|
||||
* Z | String | null-terminated string
|
||||
* B | String | bit string (MSB first)
|
||||
* b | String | bit string (LSB first)
|
||||
* H | String | hex string (high nibble first)
|
||||
* h | String | hex string (low nibble first)
|
||||
* u | String | UU-encoded string
|
||||
* M | String | quoted-printable, MIME encoding (see RFC2045)
|
||||
* m | String | base64 encoded string (RFC 2045) (default)
|
||||
* | | base64 encoded string (RFC 4648) if followed by 0
|
||||
* P | String | pointer to a structure (fixed-length string)
|
||||
* p | String | pointer to a null-terminated string
|
||||
*
|
||||
* Misc. | |
|
||||
* Directive | Returns | Meaning
|
||||
* -----------------------------------------------------------------
|
||||
* @ | --- | skip to the offset given by the length argument
|
||||
* X | --- | skip backward one byte
|
||||
* x | --- | skip forward one byte
|
||||
*
|
||||
* HISTORY
|
||||
*
|
||||
* * J, J! j, and j! are available since Ruby 2.3.
|
||||
* * Q_, Q!, q_, and q! are available since Ruby 2.1.
|
||||
* * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
pack_unpack(VALUE str, VALUE fmt)
|
||||
{
|
||||
int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
|
||||
return pack_unpack_internal(str, fmt, mode);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* str.unpack1(format) -> obj
|
||||
*
|
||||
* Decodes <i>str</i> (which may contain binary data) according to the
|
||||
* format string, returning the first value extracted.
|
||||
* See also <code>String#unpack</code>, <code>Array#pack</code>.
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
pack_unpack1(VALUE str, VALUE fmt)
|
||||
{
|
||||
return pack_unpack_internal(str, fmt, UNPACK_1);
|
||||
}
|
||||
|
||||
int
|
||||
rb_uv_to_utf8(char buf[6], unsigned long uv)
|
||||
{
|
||||
|
@ -1980,6 +2009,7 @@ Init_pack(void)
|
|||
{
|
||||
rb_define_method(rb_cArray, "pack", pack_pack, -1);
|
||||
rb_define_method(rb_cString, "unpack", pack_unpack, 1);
|
||||
rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
|
||||
|
||||
id_associated = rb_make_internal_id();
|
||||
}
|
||||
|
|
|
@ -837,4 +837,11 @@ EXPECTED
|
|||
|
||||
assert_equal addr, [buf].pack('p')
|
||||
end
|
||||
|
||||
def test_unpack1
|
||||
assert_equal 65, "A".unpack1("C")
|
||||
assert_equal 68, "ABCD".unpack1("x3C")
|
||||
assert_equal 0x3042, "\u{3042 3044 3046}".unpack1("U*")
|
||||
assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue