From bc63ec57e7f965796479f68f6f687187c089bd40 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 28 Oct 2020 10:58:28 -0700 Subject: [PATCH] [ruby/zlib] Allow Zlib.crc32 and .adler32 to accept IO instance This reads from the IO in 8192 byte chunks, so you don't need to have the entire string in memory. Fixes #16 https://github.com/ruby/zlib/commit/ba9793c550 --- ext/zlib/zlib.c | 19 ++++++++++++++++--- test/zlib/test_zlib.rb | 26 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/ext/zlib/zlib.c b/ext/zlib/zlib.c index c540d4c5a1..fa2112f6b8 100644 --- a/ext/zlib/zlib.c +++ b/ext/zlib/zlib.c @@ -56,7 +56,7 @@ max_uint(long n) #define MAX_UINT(n) (uInt)(n) #endif -static ID id_dictionaries; +static ID id_dictionaries, id_read; /*--------- Prototypes --------*/ @@ -407,6 +407,15 @@ do_checksum(int argc, VALUE *argv, uLong (*func)(uLong, const Bytef*, uInt)) if (NIL_P(str)) { sum = func(sum, Z_NULL, 0); } + else if (rb_obj_is_kind_of(str, rb_cIO)) { + VALUE buf; + VALUE buflen = INT2NUM(8192); + + while (!NIL_P(buf = rb_funcall(str, id_read, 1, buflen))) { + StringValue(buf); + sum = checksum_long(func, sum, (Bytef*)RSTRING_PTR(buf), RSTRING_LEN(buf)); + } + } else { StringValue(str); sum = checksum_long(func, sum, (Bytef*)RSTRING_PTR(str), RSTRING_LEN(str)); @@ -422,6 +431,8 @@ do_checksum(int argc, VALUE *argv, uLong (*func)(uLong, const Bytef*, uInt)) * Calculates Adler-32 checksum for +string+, and returns updated value of * +adler+. If +string+ is omitted, it returns the Adler-32 initial value. If * +adler+ is omitted, it assumes that the initial value is given to +adler+. + * If +string+ is an IO instance, reads from the IO until the IO returns nil + * and returns Adler-32 of all read data. * * Example usage: * @@ -466,7 +477,9 @@ rb_zlib_adler32_combine(VALUE klass, VALUE adler1, VALUE adler2, VALUE len2) * * Calculates CRC checksum for +string+, and returns updated value of +crc+. If * +string+ is omitted, it returns the CRC initial value. If +crc+ is omitted, it - * assumes that the initial value is given to +crc+. + * assumes that the initial value is given to +crc+. If +string+ is an IO instance, + * reads from the IO until the IO returns nil and returns CRC checksum of all read + * data. * * FIXME: expression. */ @@ -2198,7 +2211,7 @@ rb_inflate_set_dictionary(VALUE obj, VALUE dic) #define OS_CODE OS_UNIX #endif -static ID id_write, id_read, id_readpartial, id_flush, id_seek, id_close, id_path, id_input; +static ID id_write, id_readpartial, id_flush, id_seek, id_close, id_path, id_input; static VALUE cGzError, cNoFooter, cCRCError, cLengthError; diff --git a/test/zlib/test_zlib.rb b/test/zlib/test_zlib.rb index c58eafe112..c72fe76858 100644 --- a/test/zlib/test_zlib.rb +++ b/test/zlib/test_zlib.rb @@ -1145,6 +1145,19 @@ if defined? Zlib assert_equal(0x02820145, Zlib.adler32("foo")) assert_equal(0x02820145, Zlib.adler32("o", Zlib.adler32("fo"))) assert_equal(0x8a62c964, Zlib.adler32("abc\x01\x02\x03" * 10000)) + Tempfile.create("test_zlib_gzip_file_to_io") {|t| + File.binwrite(t.path, "foo") + t.rewind + assert_equal(0x02820145, Zlib.adler32(t)) + + t.rewind + crc = Zlib.adler32(t.read(2)) + assert_equal(0x02820145, Zlib.adler32(t, crc)) + + File.binwrite(t.path, "abc\x01\x02\x03" * 10000) + t.rewind + assert_equal(0x8a62c964, Zlib.adler32(t)) + } end def test_adler32_combine @@ -1167,6 +1180,19 @@ if defined? Zlib assert_equal(0x8c736521, Zlib.crc32("foo")) assert_equal(0x8c736521, Zlib.crc32("o", Zlib.crc32("fo"))) assert_equal(0x07f0d68f, Zlib.crc32("abc\x01\x02\x03" * 10000)) + Tempfile.create("test_zlib_gzip_file_to_io") {|t| + File.binwrite(t.path, "foo") + t.rewind + assert_equal(0x8c736521, Zlib.crc32(t)) + + t.rewind + crc = Zlib.crc32(t.read(2)) + assert_equal(0x8c736521, Zlib.crc32(t, crc)) + + File.binwrite(t.path, "abc\x01\x02\x03" * 10000) + t.rewind + assert_equal(0x07f0d68f, Zlib.crc32(t)) + } end def test_crc32_combine