From 2c1c462747346b05b00a0a18f28cf098526b0c1f Mon Sep 17 00:00:00 2001 From: mame Date: Thu, 25 Sep 2008 12:24:54 +0000 Subject: [PATCH] * pack.c (pack_pack, pack_unpack): 'm0' format (base64) complies with RFC 4648. It adds no line feed when encoding, and raise ArgumentError if the encoded string contains non-alphabet (including CR and LF). * lib/base64.rb: added. This provides encoding/decoding method for Base64 in standard RFC 2045, Base64 in standard RFC 4648 and ``Base 64 Encoding with URL and Filename SafeAlphabet'' in RFC 4648. * test_pack.c, test/base64/test_base64.rb: add tests for above. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19553 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 12 +++++ lib/base64.rb | 91 +++++++++++++++++++++++++++++++++++ pack.c | 97 ++++++++++++++++++++++++++----------- test/base64/test_base64.rb | 99 ++++++++++++++++++++++++++++++++++++++ test/ruby/test_pack.rb | 30 ++++++++++++ 5 files changed, 300 insertions(+), 29 deletions(-) create mode 100644 lib/base64.rb create mode 100644 test/base64/test_base64.rb diff --git a/ChangeLog b/ChangeLog index 0cf97e0138..dd594a2aa2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +Thu Sep 25 21:23:08 2008 Yusuke Endoh + + * pack.c (pack_pack, pack_unpack): 'm0' format (base64) complies with + RFC 4648. It adds no line feed when encoding, and raise ArgumentError + if the encoded string contains non-alphabet (including CR and LF). + + * lib/base64.rb: added. This provides encoding/decoding method for + Base64 in standard RFC 2045, Base64 in standard RFC 4648 and ``Base 64 + Encoding with URL and Filename SafeAlphabet'' in RFC 4648. + + * test_pack.c, test/base64/test_base64.rb: add tests for above. + Thu Sep 25 21:00:32 2008 Koichi Sasada * common.mk: fix btest-* rules [ruby-dev:36528]. diff --git a/lib/base64.rb b/lib/base64.rb new file mode 100644 index 0000000000..ebd796eccd --- /dev/null +++ b/lib/base64.rb @@ -0,0 +1,91 @@ +# +# = base64.rb: methods for base64-encoding and -decoding stings +# + +# The Base64 module provides for the encoding (#encode64, #strict_encode64, +# #urlsafe_encode64) and decoding (#decode64, #strict_decode64, +# #urlsafe_decode64) of binary data using a Base64 representation. +# +# == Example +# +# A simple encoding and decoding. +# +# require "base64" +# +# enc = Base64.encode64('Send reinforcements') +# # -> "U2VuZCByZWluZm9yY2VtZW50cw==\n" +# plain = Base64.decode64(enc) +# # -> "Send reinforcements" +# +# The purpose of using base64 to encode data is that it translates any +# binary data into purely printable characters. + +module Base64 + module_function + + # Returns the Base64-encoded version of +bin+. + # This method complies with RFC 2045. + # Line feeds are added to every 60 encoded charactors. + # + # require 'base64' + # Base64.encode64("Now is the time for all good coders\nto learn Ruby") + # + # Generates: + # + # Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g + # UnVieQ== + def encode64(bin) + [bin].pack("m") + end + + # Returns the Base64-decoded version of +str+. + # This method complies with RFC 2045. + # Characters outside the base alphabet are ignored. + # + # require 'base64' + # str = 'VGhpcyBpcyBsaW5lIG9uZQpUaGlzIG' + + # 'lzIGxpbmUgdHdvClRoaXMgaXMgbGlu' + + # 'ZSB0aHJlZQpBbmQgc28gb24uLi4K' + # puts Base64.decode64(str) + # + # Generates: + # + # This is line one + # This is line two + # This is line three + # And so on... + def decode64(str) + str.unpack("m").first + end + + # Returns the Base64-encoded version of +bin+. + # This method complies with RFC 4648. + # No line feeds are added. + def strict_encode64(bin) + [bin].pack("m0") + end + + # Returns the Base64-decoded version of +str+. + # This method complies with RFC 4648. + # ArgumentError is raised if +str+ is incorrectly padded or contains + # non-alphabet characters. Note that CR or LF are also rejected. + def strict_decode64(str) + str.unpack("m0").first + end + + # Returns the Base64-encoded version of +bin+. + # This method complies with ``Base 64 Encoding with URL and Filename Safe + # Alphabet'' in RFC 4648. + # The alphabet uses '-' instead of '+' and '_' instead of '/'. + def urlsafe_encode64(bin) + strict_encode64(bin).tr("+/", "-_") + end + + # Returns the Base64-decoded version of +str+. + # This method complies with ``Base 64 Encoding with URL and Filename Safe + # Alphabet'' in RFC 4648. + # The alphabet uses '-' instead of '+' and '_' instead of '/'. + def urlsafe_decode64(str) + strict_decode64(str.tr("-_", "+/")) + end +end diff --git a/pack.c b/pack.c index 625d3a0bcd..9641f6e407 100644 --- a/pack.c +++ b/pack.c @@ -362,7 +362,7 @@ num2i32(VALUE x) #endif static const char toofew[] = "too few arguments"; -static void encodes(VALUE,const char*,long,int); +static void encodes(VALUE,const char*,long,int,int); static void qpencode(VALUE,VALUE,long); static unsigned long utf8_to_uv(const char*,long*); @@ -414,7 +414,8 @@ static unsigned long utf8_to_uv(const char*,long*); * L | Unsigned long * l | Long * M | Quoted printable, MIME encoding (see RFC2045) - * m | Base64 encoded string + * m | Base64 encoded string (see RFC 2045, count is width) + * | (if count is 0, no line feed are added, see RFC 4648) * N | Long, network (big-endian) byte order * n | Short, network (big-endian) byte-order * P | Pointer to a structure (fixed-length string) @@ -887,6 +888,11 @@ pack_pack(VALUE ary, VALUE fmt) ptr = RSTRING_PTR(from); plen = RSTRING_LEN(from); + if (len == 0) { + encodes(res, ptr, plen, type, 0); + ptr += plen; + break; + } if (len <= 2) len = 45; else @@ -898,7 +904,7 @@ pack_pack(VALUE ary, VALUE fmt) todo = len; else todo = plen; - encodes(res, ptr, todo, type); + encodes(res, ptr, todo, type, 1); plen -= todo; ptr += todo; } @@ -1007,7 +1013,7 @@ static const char b64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static void -encodes(VALUE str, const char *s, long len, int type) +encodes(VALUE str, const char *s, long len, int type, int tail_lf) { char buff[4096]; long i = 0; @@ -1048,7 +1054,7 @@ encodes(VALUE str, const char *s, long len, int type) buff[i++] = padding; buff[i++] = padding; } - buff[i++] = '\n'; + if (tail_lf) buff[i++] = '\n'; rb_str_buf_cat(str, buff, i); } @@ -1242,7 +1248,8 @@ infected_str_new(const char *ptr, long len, VALUE str) * -------+---------+----------------------------------------- * M | String | quoted-printable * -------+---------+----------------------------------------- - * m | String | base64-encoded + * m | String | base64-encoded (RFC 2045) (default) + * | | base64-encoded (RFC 4648) if followed by 0 * -------+---------+----------------------------------------- * N | Integer | treat four characters as an unsigned * | | long in network byte order @@ -1793,7 +1800,7 @@ pack_unpack(VALUE str, VALUE fmt) { VALUE buf = infected_str_new(0, (send - s)*3/4, str); char *ptr = RSTRING_PTR(buf); - int a = -1,b = -1,c = 0,d; + int a = -1,b = -1,c = 0,d = 0; static signed char b64_xtable[256]; if (b64_xtable['/'] <= 0) { @@ -1806,30 +1813,62 @@ pack_unpack(VALUE str, VALUE fmt) b64_xtable[(unsigned char)b64_table[i]] = i; } } - while (s < send) { - a = b = c = d = -1; - while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} - if (s >= send) break; - s++; - while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} - if (s >= send) break; - s++; - while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} - if (*s == '=' || s >= send) break; - s++; - while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} - if (*s == '=' || s >= send) break; - s++; - *ptr++ = a << 2 | b >> 4; - *ptr++ = b << 4 | c >> 2; - *ptr++ = c << 6 | d; - } - if (a != -1 && b != -1) { - if (c == -1 && *s == '=') - *ptr++ = a << 2 | b >> 4; - else if (c != -1 && *s == '=') { + if (len == 0) { + while (s < send) { + a = b = c = d = -1; + a = b64_xtable[(unsigned char)*s++]; + if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); + b = b64_xtable[(unsigned char)*s++]; + if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); + if (*s == '=') { + if (s + 2 == send && *(s + 1) == '=') break; + rb_raise(rb_eArgError, "invalid base64"); + } + c = b64_xtable[(unsigned char)*s++]; + if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); + if (s + 1 == send && *s == '=') break; + d = b64_xtable[(unsigned char)*s++]; + if (d == -1) rb_raise(rb_eArgError, "invalid base64"); *ptr++ = a << 2 | b >> 4; *ptr++ = b << 4 | c >> 2; + *ptr++ = c << 6 | d; + } + if (c == -1) { + *ptr++ = a << 2 | b >> 4; + if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); + } + else if (d == -1) { + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); + } + } + else { + while (s < send) { + a = b = c = d = -1; + while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + *ptr++ = c << 6 | d; + } + if (a != -1 && b != -1) { + if (c == -1 && *s == '=') + *ptr++ = a << 2 | b >> 4; + else if (c != -1 && *s == '=') { + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + } } } rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); diff --git a/test/base64/test_base64.rb b/test/base64/test_base64.rb new file mode 100644 index 0000000000..9ae54cb405 --- /dev/null +++ b/test/base64/test_base64.rb @@ -0,0 +1,99 @@ +require "test/unit" +require "base64" + +class TestBase64 < Test::Unit::TestCase + def test_sample + assert_equal("U2VuZCByZWluZm9yY2VtZW50cw==\n", Base64.encode64('Send reinforcements')) + assert_equal('Send reinforcements', Base64.decode64("U2VuZCByZWluZm9yY2VtZW50cw==\n")) + assert_equal( + "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g\nUnVieQ==\n", + Base64.encode64("Now is the time for all good coders\nto learn Ruby")) + assert_equal( + "Now is the time for all good coders\nto learn Ruby", + Base64.decode64("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g\nUnVieQ==\n")) + assert_equal( + "VGhpcyBpcyBsaW5lIG9uZQpUaGlzIGlzIGxpbmUgdHdvClRoaXMgaXMgbGlu\nZSB0aHJlZQpBbmQgc28gb24uLi4K\n", + Base64.encode64("This is line one\nThis is line two\nThis is line three\nAnd so on...\n")) + assert_equal( + "This is line one\nThis is line two\nThis is line three\nAnd so on...\n", + Base64.decode64("VGhpcyBpcyBsaW5lIG9uZQpUaGlzIGlzIGxpbmUgdHdvClRoaXMgaXMgbGluZSB0aHJlZQpBbmQgc28gb24uLi4K")) + end + + def test_encode64 + assert_equal("", Base64.encode64("")) + assert_equal("AA==\n", Base64.encode64("\0")) + assert_equal("AAA=\n", Base64.encode64("\0\0")) + assert_equal("AAAA\n", Base64.encode64("\0\0\0")) + assert_equal("/w==\n", Base64.encode64("\377")) + assert_equal("//8=\n", Base64.encode64("\377\377")) + assert_equal("////\n", Base64.encode64("\377\377\377")) + assert_equal("/+8=\n", Base64.encode64("\xff\xef")) + end + + def test_decode64 + assert_equal("", Base64.decode64("")) + assert_equal("\0", Base64.decode64("AA==\n")) + assert_equal("\0\0", Base64.decode64("AAA=\n")) + assert_equal("\0\0\0", Base64.decode64("AAAA\n")) + assert_equal("\377", Base64.decode64("/w==\n")) + assert_equal("\377\377", Base64.decode64("//8=\n")) + assert_equal("\377\377\377", Base64.decode64("////\n")) + assert_equal("\xff\xef", Base64.decode64("/+8=\n")) + end + + def test_strict_encode64 + assert_equal("", Base64.strict_encode64("")) + assert_equal("AA==", Base64.strict_encode64("\0")) + assert_equal("AAA=", Base64.strict_encode64("\0\0")) + assert_equal("AAAA", Base64.strict_encode64("\0\0\0")) + assert_equal("/w==", Base64.strict_encode64("\377")) + assert_equal("//8=", Base64.strict_encode64("\377\377")) + assert_equal("////", Base64.strict_encode64("\377\377\377")) + assert_equal("/+8=", Base64.strict_encode64("\xff\xef")) + end + + def test_strict_decode64 + assert_equal("", Base64.strict_decode64("")) + assert_equal("\0", Base64.strict_decode64("AA==")) + assert_equal("\0\0", Base64.strict_decode64("AAA=")) + assert_equal("\0\0\0", Base64.strict_decode64("AAAA")) + assert_equal("\377", Base64.strict_decode64("/w==")) + assert_equal("\377\377", Base64.strict_decode64("//8=")) + assert_equal("\377\377\377", Base64.strict_decode64("////")) + assert_equal("\xff\xef", Base64.strict_decode64("/+8=")) + + assert_raise(ArgumentError) { Base64.strict_decode64("^") } + assert_raise(ArgumentError) { Base64.strict_decode64("A") } + assert_raise(ArgumentError) { Base64.strict_decode64("A^") } + assert_raise(ArgumentError) { Base64.strict_decode64("AA") } + assert_raise(ArgumentError) { Base64.strict_decode64("AA=") } + assert_raise(ArgumentError) { Base64.strict_decode64("AA===") } + assert_raise(ArgumentError) { Base64.strict_decode64("AA=x") } + assert_raise(ArgumentError) { Base64.strict_decode64("AAA") } + assert_raise(ArgumentError) { Base64.strict_decode64("AAA^") } + assert_raise(ArgumentError) { Base64.strict_decode64("AB==") } + assert_raise(ArgumentError) { Base64.strict_decode64("AAB=") } + end + + def test_urlsafe_encode64 + assert_equal("", Base64.urlsafe_encode64("")) + assert_equal("AA==", Base64.urlsafe_encode64("\0")) + assert_equal("AAA=", Base64.urlsafe_encode64("\0\0")) + assert_equal("AAAA", Base64.urlsafe_encode64("\0\0\0")) + assert_equal("_w==", Base64.urlsafe_encode64("\377")) + assert_equal("__8=", Base64.urlsafe_encode64("\377\377")) + assert_equal("____", Base64.urlsafe_encode64("\377\377\377")) + assert_equal("_-8=", Base64.urlsafe_encode64("\xff\xef")) + end + + def test_urlsafe_decode64 + assert_equal("", Base64.urlsafe_decode64("")) + assert_equal("\0", Base64.urlsafe_decode64("AA==")) + assert_equal("\0\0", Base64.urlsafe_decode64("AAA=")) + assert_equal("\0\0\0", Base64.urlsafe_decode64("AAAA")) + assert_equal("\377", Base64.urlsafe_decode64("_w==")) + assert_equal("\377\377", Base64.urlsafe_decode64("__8=")) + assert_equal("\377\377\377", Base64.urlsafe_decode64("____")) + assert_equal("\xff\xef", Base64.urlsafe_decode64("_+8=")) + end +end diff --git a/test/ruby/test_pack.rb b/test/ruby/test_pack.rb index fee992efbc..22b7b87580 100644 --- a/test/ruby/test_pack.rb +++ b/test/ruby/test_pack.rb @@ -379,6 +379,36 @@ class TestPack < Test::Unit::TestCase assert_equal(["\377\377\377"], "////\n".unpack("m")) end + def test_pack_unpack_m0 + assert_equal("", [""].pack("m0")) + assert_equal("AA==", ["\0"].pack("m0")) + assert_equal("AAA=", ["\0\0"].pack("m0")) + assert_equal("AAAA", ["\0\0\0"].pack("m0")) + assert_equal("/w==", ["\377"].pack("m0")) + assert_equal("//8=", ["\377\377"].pack("m0")) + assert_equal("////", ["\377\377\377"].pack("m0")) + + assert_equal([""], "".unpack("m0")) + assert_equal(["\0"], "AA==".unpack("m0")) + assert_equal(["\0\0"], "AAA=".unpack("m0")) + assert_equal(["\0\0\0"], "AAAA".unpack("m0")) + assert_equal(["\377"], "/w==".unpack("m0")) + assert_equal(["\377\377"], "//8=".unpack("m0")) + assert_equal(["\377\377\377"], "////".unpack("m0")) + + assert_raise(ArgumentError) { "^".unpack("m0") } + assert_raise(ArgumentError) { "A".unpack("m0") } + assert_raise(ArgumentError) { "A^".unpack("m0") } + assert_raise(ArgumentError) { "AA".unpack("m0") } + assert_raise(ArgumentError) { "AA=".unpack("m0") } + assert_raise(ArgumentError) { "AA===".unpack("m0") } + assert_raise(ArgumentError) { "AA=x".unpack("m0") } + assert_raise(ArgumentError) { "AAA".unpack("m0") } + assert_raise(ArgumentError) { "AAA^".unpack("m0") } + assert_raise(ArgumentError) { "AB==".unpack("m0") } + assert_raise(ArgumentError) { "AAB=".unpack("m0") } + end + def test_pack_unpack_M assert_equal("a b c\td =\n\ne=\n", ["a b c\td \ne"].pack("M")) assert_equal(["a b c\td \ne"], "a b c\td =\n\ne=\n".unpack("M"))