From c82aee31b41f0772dd0491c6bfa5cb90b47d0d5a Mon Sep 17 00:00:00 2001 From: akr Date: Thu, 14 Aug 2008 14:28:10 +0000 Subject: [PATCH] * include/ruby/encoding.h (rb_econv_result_t): moved from transcode_data.h. (rb_econv_elem_t): ditto. (rb_econv_t): ditto. source_encoding and destination_encoding field is added. (rb_econv_open): declared. (rb_econv_convert): ditto. (rb_econv_close): ditto. * transcode.c (rb_econv_open_by_transcoder_entries): initialize source_encoding and destination_encoding field as NULL. (rb_econv_open): make it external linkage. (rb_econv_close): ditto. (rb_econv_convert): ditto. renamed from rb_econv_conv. (make_encoding): new function. (econv_init): use make_encoding and store rb_encoding* in rb_econv_t. (econv_source_encoding): new method Encoding::Converter#source_encoding. (econv_destination_encoding): new method Encoding::Converter#destination_encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18625 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 24 +++++++++++ include/ruby/encoding.h | 41 ++++++++++++++++++ test/ruby/test_econv.rb | 6 +++ transcode.c | 95 +++++++++++++++++++++++++++-------------- transcode_data.h | 27 ------------ 5 files changed, 135 insertions(+), 58 deletions(-) diff --git a/ChangeLog b/ChangeLog index ae37d32a4f..70e87471ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +Thu Aug 14 23:22:24 2008 Tanaka Akira + + * include/ruby/encoding.h (rb_econv_result_t): moved from + transcode_data.h. + (rb_econv_elem_t): ditto. + (rb_econv_t): ditto. source_encoding and destination_encoding field + is added. + (rb_econv_open): declared. + (rb_econv_convert): ditto. + (rb_econv_close): ditto. + + * transcode.c (rb_econv_open_by_transcoder_entries): initialize + source_encoding and destination_encoding field as NULL. + (rb_econv_open): make it external linkage. + (rb_econv_close): ditto. + (rb_econv_convert): ditto. renamed from rb_econv_conv. + (make_encoding): new function. + (econv_init): use make_encoding and store rb_encoding* in + rb_econv_t. + (econv_source_encoding): new method + Encoding::Converter#source_encoding. + (econv_destination_encoding): new method + Encoding::Converter#destination_encoding. + Thu Aug 14 22:44:32 2008 Tanaka Akira * transcode_data.h (rb_econv_result_t): change enumeration diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index c301b84580..90828f6c2d 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -196,4 +196,45 @@ rb_enc_dummy_p(rb_encoding *enc) VALUE rb_str_transcode(VALUE str, VALUE to); +/* econv stuff */ + +typedef enum { + econv_invalid_byte_sequence, + econv_undefined_conversion, + econv_destination_buffer_full, + econv_source_buffer_empty, + econv_finished, + econv_output_followed_by_input, +} rb_econv_result_t; + +typedef struct { + const char *from; + const char *to; + struct rb_transcoding *tc; + unsigned char *out_buf_start; + unsigned char *out_data_start; + unsigned char *out_data_end; + unsigned char *out_buf_end; + rb_econv_result_t last_result; +} rb_econv_elem_t; + +typedef struct { + rb_econv_elem_t *elems; + int num_trans; + int num_finished; + struct rb_transcoding *last_tc; + + /* The following fields are only for Encoding::Converter. + * rb_econv_open set them NULL. */ + rb_encoding *source_encoding; + rb_encoding *destination_encoding; +} rb_econv_t; + +rb_econv_t *rb_econv_open(const char *from, const char *to, int flags); +rb_econv_result_t rb_econv_convert(rb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags); +void rb_econv_close(rb_econv_t *ec); + #endif /* RUBY_ENCODING_H */ diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 239489fdd9..98ed310504 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -25,6 +25,12 @@ class TestEncodingConverter < Test::Unit::TestCase assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP)) end + def test_get_encoding + ec = Encoding::Converter.new("UTF-8", "EUC-JP") + assert_equal(Encoding::UTF_8, ec.source_encoding) + assert_equal(Encoding::EUC_JP, ec.destination_encoding) + end + def test_output_region ec = Encoding::Converter.new("UTF-8", "EUC-JP") ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT) diff --git a/transcode.c b/transcode.c index c81b7e755f..c7a5c1b0d3 100644 --- a/transcode.c +++ b/transcode.c @@ -678,6 +678,8 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries) ts->elems = ALLOC_N(rb_econv_elem_t, ts->num_trans); ts->num_finished = 0; ts->last_tc = NULL; + ts->source_encoding = NULL; + ts->destination_encoding = NULL; for (i = 0; i < ts->num_trans; i++) { const rb_transcoder *tr = load_transcoder_entry(entries[i]); ts->elems[i].from = tr->from_encoding; @@ -720,7 +722,7 @@ trans_open_i(const char *from, const char *to, int depth, void *arg) entries[depth] = get_transcoder_entry(from, to); } -static rb_econv_t * +rb_econv_t * rb_econv_open(const char *from, const char *to, int flags) { transcoder_entry_t **entries = NULL; @@ -921,8 +923,8 @@ found_needreport: return econv_source_buffer_empty; } -static rb_econv_result_t -rb_econv_conv(rb_econv_t *ts, +rb_econv_result_t +rb_econv_convert(rb_econv_t *ts, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags) @@ -940,7 +942,7 @@ rb_econv_conv(rb_econv_t *ts, return res; } -static void +void rb_econv_close(rb_econv_t *ts) { int i; @@ -1049,7 +1051,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, max_output = last_tc->transcoder->max_output; resume: - ret = rb_econv_conv(ts, in_pos, in_stop, out_pos, out_stop, opt); + ret = rb_econv_convert(ts, in_pos, in_stop, out_pos, out_stop, opt); if (ret == econv_invalid_byte_sequence) { /* deal with invalid byte sequence */ /* todo: add more alternative behaviors */ @@ -1119,14 +1121,14 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, if (ret == econv_source_buffer_empty) { if (ptr < in_stop) { input_byte = *ptr; - ret = rb_econv_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT); + ret = rb_econv_convert(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT); } else { - ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, 0); + ret = rb_econv_convert(ts, NULL, NULL, out_pos, out_stop, 0); } } else { - ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT); + ret = rb_econv_convert(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT); } if (&input_byte != p) ptr += p - &input_byte; @@ -1381,6 +1383,20 @@ econv_s_allocate(VALUE klass) return Data_Wrap_Struct(klass, NULL, econv_free, NULL); } +static rb_encoding * +make_encoding(VALUE encoding) +{ + int idx = rb_to_encoding_index(encoding); + rb_encoding *enc; + if (0 <= idx) + enc = rb_enc_from_index(idx); + else { + idx = rb_define_dummy_encoding(StringValueCStr(encoding)); + enc = rb_enc_from_index(idx); + } + return enc; +} + /* * call-seq: * Encoding::Converter.new(source_encoding, destination_encoding) @@ -1414,7 +1430,6 @@ econv_init(int argc, VALUE *argv, VALUE self) { VALUE source_encoding, destination_encoding, flags_v; rb_encoding *senc, *denc; - const char *sname, *dname; rb_econv_t *ec; int flags; @@ -1425,35 +1440,21 @@ econv_init(int argc, VALUE *argv, VALUE self) else flags = NUM2INT(flags_v); - senc = NULL; - if (TYPE(source_encoding) != T_STRING) { - senc = rb_to_encoding(source_encoding); - } - - denc = NULL; - if (TYPE(destination_encoding) != T_STRING) { - denc = rb_to_encoding(destination_encoding); - } - - if (senc) - sname = senc->name; - else - sname = RSTRING_PTR(source_encoding); - - if (denc) - dname = denc->name; - else - dname = RSTRING_PTR(destination_encoding); + senc = make_encoding(source_encoding); + denc = make_encoding(destination_encoding); if (DATA_PTR(self)) { rb_raise(rb_eTypeError, "already initialized"); } - ec = rb_econv_open(sname, dname, flags); + ec = rb_econv_open(senc->name, denc->name, flags); if (!ec) { - rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", sname, dname); + rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", senc->name, denc->name); } + ec->source_encoding = senc; + ec->destination_encoding = denc; + DATA_PTR(self) = ec; return self; @@ -1489,6 +1490,36 @@ check_econv(VALUE self) return DATA_PTR(self); } +/* + * call-seq: + * source_encoding -> encoding + * + * returns source encoding as Encoding object. + */ +static VALUE +econv_source_encoding(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + if (!ec->source_encoding) + return Qnil; + return rb_enc_from_encoding(ec->source_encoding); +} + +/* + * call-seq: + * destination_encoding -> encoding + * + * returns destination encoding as Encoding object. + */ +static VALUE +econv_destination_encoding(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + if (!ec->destination_encoding) + return Qnil; + return rb_enc_from_encoding(ec->destination_encoding); +} + /* * call-seq: * primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol @@ -1612,7 +1643,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self) op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset; os = op + output_bytesize; - res = rb_econv_conv(ts, &ip, is, &op, os, flags); + res = rb_econv_convert(ts, &ip, is, &op, os, flags); rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output)); rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input)); @@ -1647,6 +1678,8 @@ Init_transcode(void) rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate); rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1); rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0); + rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0); + rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0); rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT)); rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(OUTPUT_FOLLOWED_BY_INPUT)); diff --git a/transcode_data.h b/transcode_data.h index 855984e46f..b53a1813df 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -111,33 +111,6 @@ struct rb_transcoder { int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */ }; -typedef enum { - econv_invalid_byte_sequence, - econv_undefined_conversion, - econv_destination_buffer_full, - econv_source_buffer_empty, - econv_finished, - econv_output_followed_by_input, -} rb_econv_result_t; - -typedef struct { - const char *from; - const char *to; - rb_transcoding *tc; - unsigned char *out_buf_start; - unsigned char *out_data_start; - unsigned char *out_data_end; - unsigned char *out_buf_end; - rb_econv_result_t last_result; -} rb_econv_elem_t; - -typedef struct { - rb_econv_elem_t *elems; - int num_trans; - int num_finished; - rb_transcoding *last_tc; -} rb_econv_t; - void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib); void rb_register_transcoder(const rb_transcoder *);