From 5825359dd87a26d5daf7a604583baa0ab48cc543 Mon Sep 17 00:00:00 2001 From: naruse Date: Thu, 14 Oct 2010 13:12:56 +0000 Subject: [PATCH] * pack.c (pack_pack): support endian modifiers: < and >. [ruby-dev:42376] Feature #3491 * pack.c (pack_unpack): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29496 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++ NEWS | 6 ++ pack.c | 101 ++++++++++++++++++++++-------- test/ruby/test_pack.rb | 135 ++++++++++++++++++++++------------------- 4 files changed, 163 insertions(+), 86 deletions(-) diff --git a/ChangeLog b/ChangeLog index cabcf5ffd1..00ff14a6ed 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Thu Oct 14 20:41:27 2010 NARUSE, Yui + + * pack.c (pack_pack): support endian modifiers: < and >. + [ruby-dev:42376] Feature #3491 + + * pack.c (pack_unpack): ditto. + Thu Oct 14 20:50:51 2010 Masaki Suketa * ext/win32ole/win32ole.c (reg_get_val): expand environment in diff --git a/NEWS b/NEWS index 2d6e6df552..53f88ea001 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,8 @@ with all sufficient information, see the ChangeLog file. * new constants: * File::NULL name of NULL device. + * extended methods: + * String#unpack supports endian modifiers * String * new methods: @@ -44,6 +46,10 @@ with all sufficient information, see the ChangeLog file. * extended methods: * IO#putc supports multibyte characters + * Array + * extended methods: + * Array#pack supports endian modifiers + * io/console * new methods: * IO#noecho {|io| } diff --git a/pack.c b/pack.c index e24041e224..d36eb29bd5 100644 --- a/pack.c +++ b/pack.c @@ -330,14 +330,6 @@ static unsigned long utf8_to_uv(const char*,long*); * l | Integer | 32-bit signed, native endian (int32_t) * q | Integer | 64-bit signed, native endian (int64_t) * | | - * S_, S! | Integer | unsigned short, native endian - * I, I_, I! | Integer | unsigned int, native endian - * L_, L! | Integer | unsigned long, native endian - * | | - * s_, s! | Integer | signed short, native endian - * i, i_, i! | Integer | signed int, native endian - * l_, l! | Integer | signed long, native endian - * | | * n | Integer | 16-bit unsigned, network (big-endian) byte order * N | Integer | 32-bit unsigned, network (big-endian) byte order * v | Integer | 16-bit unsigned, VAX (little-endian) byte order @@ -379,6 +371,14 @@ static unsigned long utf8_to_uv(const char*,long*); * @ | --- | moves to absolute position * X | --- | back up a byte * x | --- | null byte + * + * | Target | + * Modifier | Directive | Meaning + * --------------------------------------------------------------------------- + * _, ! | sSiIlL | Force native size of the related type: + * | | short, int, long, and long long + * > | sSiIlLqQ | Force big-endian byte order + * < | sSiIlLqQ | Force little-endian byte order */ static VALUE @@ -396,6 +396,7 @@ pack_pack(VALUE ary, VALUE fmt) int natint; /* native integer */ #endif int signed_p, integer_size, bigendian_p; + int explicit_endian = 0; StringValue(fmt); p = RSTRING_PTR(fmt); @@ -425,19 +426,39 @@ pack_pack(VALUE ary, VALUE fmt) } continue; } - if (*p == '_' || *p == '!') { - static const char natstr[] = "sSiIlL"; - if (strchr(natstr, type)) { + { + static const char natstr[] = "sSiIlL"; + static const char endstr[] = "sSiIlLqQ"; + + modifiers: + switch (*p) { + case '_': + case '!': + if (strchr(natstr, type)) { #ifdef NATINT_PACK - natint = 1; + natint = 1; #endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + goto modifiers; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + explicit_endian = *p++; + goto modifiers; } } + if (*p == '*') { /* set data length */ len = strchr("@Xxu", type) ? 0 : strchr("PMm", type) ? 1 @@ -716,6 +737,10 @@ pack_pack(VALUE ary, VALUE fmt) goto pack_integer; pack_integer: + if (explicit_endian) { + bigendian_p = ((explicit_endian - '<') != 0); + } + switch (integer_size) { #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) case SIZEOF_INT16_T: @@ -1309,6 +1334,7 @@ pack_unpack(VALUE str, VALUE fmt) #endif int block_p = rb_block_given_p(); int signed_p, integer_size, bigendian_p; + int explicit_endian = 0; #define UNPACK_PUSH(item) do {\ VALUE item_val = (item);\ if (block_p) {\ @@ -1340,20 +1366,41 @@ pack_unpack(VALUE str, VALUE fmt) } continue; } - star = 0; - if (*p == '_' || *p == '!') { - static const char natstr[] = "sSiIlL"; - if (strchr(natstr, type)) { + star = 0; + { + static const char natstr[] = "sSiIlL"; + static const char endstr[] = "sSiIlLqQ"; + + modifiers: + switch (*p) { + case '_': + case '!': + + if (strchr(natstr, type)) { #ifdef NATINT_PACK - natint = 1; + natint = 1; #endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + goto modifiers; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + explicit_endian = *p++; + goto modifiers; } } + if (p >= pend) len = 1; else if (*p == '*') { @@ -1586,6 +1633,10 @@ pack_unpack(VALUE str, VALUE fmt) goto unpack_integer; unpack_integer: + if (explicit_endian) { + bigendian_p = ((explicit_endian - '<') != 0); + } + switch (integer_size) { #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) case SIZEOF_INT16_T: diff --git a/test/ruby/test_pack.rb b/test/ruby/test_pack.rb index a4a6308299..6f746f2596 100644 --- a/test/ruby/test_pack.rb +++ b/test/ruby/test_pack.rb @@ -70,75 +70,88 @@ class TestPack < Test::Unit::TestCase assert_equal [1,1,1], "\000\000\000\001\000\000\000\001\000\000\000\001".unpack('N*') end + def _integer_big_endian(mod='') + assert_equal("\x01\x02", [0x0102].pack("s"+mod)) + assert_equal("\x01\x02", [0x0102].pack("S"+mod)) + assert_equal("\x01\x02\x03\x04", [0x01020304].pack("l"+mod)) + assert_equal("\x01\x02\x03\x04", [0x01020304].pack("L"+mod)) + assert_equal("\x01\x02\x03\x04\x05\x06\x07\x08", [0x0102030405060708].pack("q"+mod)) + assert_equal("\x01\x02\x03\x04\x05\x06\x07\x08", [0x0102030405060708].pack("Q"+mod)) + assert_match(/\A\x00*\x01\x02\z/, [0x0102].pack("s!"+mod)) + assert_match(/\A\x00*\x01\x02\z/, [0x0102].pack("S!"+mod)) + assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("i"+mod)) + assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("I"+mod)) + assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("i!"+mod)) + assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("I!"+mod)) + assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("l!"+mod)) + assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("L!"+mod)) + %w[s S l L q Q s! S! i I i! I! l! L!].each {|fmt| + fmt += mod + nuls = [0].pack(fmt) + v = 0 + s = "".force_encoding("ascii-8bit") + nuls.bytesize.times {|i| + j = i + 40 + v = v * 256 + j + s << [j].pack("C") + } + assert_equal(s, [v].pack(fmt), "[#{v}].pack(#{fmt.dump})") + assert_equal([v], s.unpack(fmt), "#{s.dump}.unpack(#{fmt.dump})") + s2 = s+s + fmt2 = fmt+"*" + assert_equal([v,v], s2.unpack(fmt2), "#{s2.dump}.unpack(#{fmt2.dump})") + } + end + + def _integer_little_endian(mod='') + assert_equal("\x02\x01", [0x0102].pack("s"+mod)) + assert_equal("\x02\x01", [0x0102].pack("S"+mod)) + assert_equal("\x04\x03\x02\x01", [0x01020304].pack("l"+mod)) + assert_equal("\x04\x03\x02\x01", [0x01020304].pack("L"+mod)) + assert_equal("\x08\x07\x06\x05\x04\x03\x02\x01", [0x0102030405060708].pack("q"+mod)) + assert_equal("\x08\x07\x06\x05\x04\x03\x02\x01", [0x0102030405060708].pack("Q"+mod)) + assert_match(/\A\x02\x01\x00*\z/, [0x0102].pack("s!"+mod)) + assert_match(/\A\x02\x01\x00*\z/, [0x0102].pack("S!"+mod)) + assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("i"+mod)) + assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("I"+mod)) + assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("i!"+mod)) + assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("I!"+mod)) + assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("l!"+mod)) + assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("L!"+mod)) + %w[s S l L q Q s! S! i I i! I! l! L!].each {|fmt| + fmt += mod + nuls = [0].pack(fmt) + v = 0 + s = "".force_encoding("ascii-8bit") + nuls.bytesize.times {|i| + j = i+40 + v = v * 256 + j + s << [j].pack("C") + } + s.reverse! + assert_equal(s, [v].pack(fmt), "[#{v}].pack(#{fmt.dump})") + assert_equal([v], s.unpack(fmt), "#{s.dump}.unpack(#{fmt.dump})") + s2 = s+s + fmt2 = fmt+"*" + assert_equal([v,v], s2.unpack(fmt2), "#{s2.dump}.unpack(#{fmt2.dump})") + } + end + def test_integer_endian s = [1].pack("s") assert_includes(["\0\1", "\1\0"], s) if s == "\0\1" - # big endian - assert_equal("\x01\x02", [0x0102].pack("s")) - assert_equal("\x01\x02", [0x0102].pack("S")) - assert_equal("\x01\x02\x03\x04", [0x01020304].pack("l")) - assert_equal("\x01\x02\x03\x04", [0x01020304].pack("L")) - assert_equal("\x01\x02\x03\x04\x05\x06\x07\x08", [0x0102030405060708].pack("q")) - assert_equal("\x01\x02\x03\x04\x05\x06\x07\x08", [0x0102030405060708].pack("Q")) - assert_match(/\A\x00*\x01\x02\z/, [0x0102].pack("s!")) - assert_match(/\A\x00*\x01\x02\z/, [0x0102].pack("S!")) - assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("i")) - assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("I")) - assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("i!")) - assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("I!")) - assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("l!")) - assert_match(/\A\x00*\x01\x02\x03\x04\z/, [0x01020304].pack("L!")) - %w[s S l L q Q s! S! i I i! I! l! L!].each {|fmt| - nuls = [0].pack(fmt) - v = 0 - s = "".force_encoding("ascii-8bit") - nuls.bytesize.times {|i| - j = i + 40 - v = v * 256 + j - s << [j].pack("C") - } - assert_equal(s, [v].pack(fmt), "[#{v}].pack(#{fmt.dump})") - assert_equal([v], s.unpack(fmt), "#{s.dump}.unpack(#{fmt.dump})") - s2 = s+s - fmt2 = fmt+"*" - assert_equal([v,v], s2.unpack(fmt2), "#{s2.dump}.unpack(#{fmt2.dump})") - } + _integer_big_endian() else - # little endian - assert_equal("\x02\x01", [0x0102].pack("s")) - assert_equal("\x02\x01", [0x0102].pack("S")) - assert_equal("\x04\x03\x02\x01", [0x01020304].pack("l")) - assert_equal("\x04\x03\x02\x01", [0x01020304].pack("L")) - assert_equal("\x08\x07\x06\x05\x04\x03\x02\x01", [0x0102030405060708].pack("q")) - assert_equal("\x08\x07\x06\x05\x04\x03\x02\x01", [0x0102030405060708].pack("Q")) - assert_match(/\A\x02\x01\x00*\z/, [0x0102].pack("s!")) - assert_match(/\A\x02\x01\x00*\z/, [0x0102].pack("S!")) - assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("i")) - assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("I")) - assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("i!")) - assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("I!")) - assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("l!")) - assert_match(/\A\x04\x03\x02\x01\x00*\z/, [0x01020304].pack("L!")) - %w[s S l L q Q s! S! i I i! I! l! L!].each {|fmt| - nuls = [0].pack(fmt) - v = 0 - s = "".force_encoding("ascii-8bit") - nuls.bytesize.times {|i| - j = i+40 - v = v * 256 + j - s << [j].pack("C") - } - s.reverse! - assert_equal(s, [v].pack(fmt), "[#{v}].pack(#{fmt.dump})") - assert_equal([v], s.unpack(fmt), "#{s.dump}.unpack(#{fmt.dump})") - s2 = s+s - fmt2 = fmt+"*" - assert_equal([v,v], s2.unpack(fmt2), "#{s2.dump}.unpack(#{fmt2.dump})") - } + _integer_little_endian() end end + def test_integer_endian_explicit + _integer_big_endian('>') + _integer_little_endian('<') + end + def test_pack_U assert_raise(RangeError) { [-0x40000001].pack("U") } assert_raise(RangeError) { [-0x40000000].pack("U") }