diff --git a/ChangeLog b/ChangeLog index 6bc7865be9..2782a9ac85 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Thu Sep 11 02:14:38 2008 Tanaka Akira + + * transcode.c (econv_convpath): new method. + Wed Sep 10 23:00:43 2008 Yusuke Endoh * tool/compile_prelude.rb: print "" instead of diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index bd748440b9..1b7f4fbdef 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -807,4 +807,23 @@ class TestEncodingConverter < Test::Unit::TestCase assert_equal("?x".force_encoding("iso-2022-jp"), "\222\xA1x".encode("iso-2022-jp", "stateless-iso-2022-jp", :invalid => :replace)) end + + def test_convpath + assert_equal([], Encoding::Converter.new("", "").convpath) + assert_equal([["EUC-JP", "UTF-8"], ["UTF-8", "ISO-8859-1"]], + Encoding::Converter.new("EUC-JP", "ISO-8859-1").convpath) + assert_equal([["EUC-JP", "stateless-ISO-2022-JP"], ["stateless-ISO-2022-JP", "ISO-2022-JP"]], + Encoding::Converter.new("EUC-JP", "ISO-2022-JP").convpath) + assert_equal([["ISO-2022-JP", "stateless-ISO-2022-JP"], + ["stateless-ISO-2022-JP", "EUC-JP"], + ["EUC-JP", "UTF-8"], + ["UTF-8", "ISO-8859-1"]], + Encoding::Converter.new("ISO-2022-JP", "ISO-8859-1").convpath) + assert_equal(["universal_newline", ["UTF-8", "UTF-16BE"]], + Encoding::Converter.new("UTF-8", "UTF-16BE", universal_newline: true).convpath) + assert_equal([["UTF-16BE", "UTF-8"], "universal_newline"], + Encoding::Converter.new("UTF-16BE", "UTF-8", universal_newline: true).convpath) + assert_equal([["UTF-16BE", "UTF-8"], "universal_newline", ["UTF-8", "UTF-16LE"]], + Encoding::Converter.new("UTF-16BE", "UTF-16LE", universal_newline: true).convpath) + end end diff --git a/transcode.c b/transcode.c index d7ae502273..88d58097dd 100644 --- a/transcode.c +++ b/transcode.c @@ -2785,6 +2785,46 @@ econv_destination_encoding(VALUE self) return rb_enc_from_encoding(ec->destination_encoding); } +/* + * call-seq: + * ec.convpath -> ary + * + * returns the conversion path of ec. + * + * The result is an array of conversions. + * + * ec = Encoding::Converter.new("ISo-8859-1", "EUC-JP", crlf_newline: true) + * p ec.convpath + * #=> [["ISO-8859-1", "UTF-8"], ["UTF-8", "EUC-JP"], "crlf_newline"] + * + * A element of the array is a pair of string or a string. + * The pair means encoding conversion. + * The string means decorator. + * + * In the above example, ["ISO-8859-1", "UTF-8"] means a converter from + * ISO-8859-1 to UTF-8. + * "crlf_newline" means newline converter from LF to CRLF. + */ +static VALUE +econv_convpath(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + VALUE result; + int i; + + result = rb_ary_new(); + for (i = 0; i < ec->num_trans; i++) { + const rb_transcoder *tr = ec->elems[i].tc->transcoder; + VALUE v; + if (SUPPLEMENTAL_CONVERSION(tr->src_encoding, tr->dst_encoding)) + v = rb_str_new_cstr(tr->dst_encoding); + else + v = rb_assoc_new(rb_str_new_cstr(tr->src_encoding), rb_str_new_cstr(tr->dst_encoding)); + rb_ary_push(result, v); + } + return result; +} + static VALUE econv_result_to_symbol(rb_econv_result_t res) { @@ -3609,6 +3649,7 @@ Init_transcode(void) rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1); rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1); rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0); + rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0); rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0); rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0); rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);