1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* transcode.c (transcode_loop): insert output the value when

fallback hash has a related key. [ruby-dev:40540]
  [ruby-dev:40829] #3036

* transcode.c (rb_econv_prepare_opts): pass to newhash
  a value with the key :fallback.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27326 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2010-04-13 01:26:46 +00:00
parent 9afaef347c
commit c871aee96b
3 changed files with 57 additions and 3 deletions

View file

@ -1,3 +1,12 @@
Tue Apr 13 09:32:12 2010 NARUSE, Yui <naruse@ruby-lang.org>
* transcode.c (transcode_loop): insert output the value when
fallback hash has a related key. [ruby-dev:40540]
[ruby-dev:40829] #3036
* transcode.c (rb_econv_prepare_opts): pass to newhash
a value with the key :fallback.
Tue Apr 13 00:12:04 2010 Tanaka Akira <akr@fsij.org>
* random.c (rand_init): use the absolute value of seed to

View file

@ -1892,8 +1892,7 @@ class TestTranscode < Test::Unit::TestCase
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5-HKSCS') # 神林義博
end
def
test_Big5_UAO
def test_Big5_UAO
check_both_ways("\u4e17", "\x81\x40", 'Big5-UAO') # 丗
end
@ -1903,4 +1902,13 @@ class TestTranscode < Test::Unit::TestCase
assert_equal(Encoding::US_ASCII, a.encoding)
assert_equal(Encoding::Shift_JIS, b.encoding)
end
def test_fallback
assert_equal("\u3042".encode("EUC-JP"), "\u{20000}".encode("EUC-JP",
fallback: {"\u{20000}" => "\u3042".encode("EUC-JP")}))
assert_equal("\u3042".encode("EUC-JP"), "\u{20000}".encode("EUC-JP",
fallback: {"\u{20000}" => "\u3042"}))
assert_equal("[ISU]", "\u{1F4BA}".encode("SJIS-KDDI",
fallback: {"\u{1F4BA}" => "[ISU]"}))
end
end

View file

@ -21,7 +21,7 @@ VALUE rb_eConverterNotFoundError;
VALUE rb_cEncodingConverter;
static VALUE sym_invalid, sym_undef, sym_replace;
static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
static VALUE sym_xml, sym_text, sym_attr;
static VALUE sym_universal_newline;
static VALUE sym_crlf_newline;
@ -2256,17 +2256,37 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
unsigned char *out_start = *out_pos;
int max_output;
VALUE exc;
VALUE fallback = Qnil;
ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
if (!ec)
rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
if (!NIL_P(ecopts) && TYPE(ecopts) == T_HASH)
fallback = rb_hash_aref(ecopts, sym_fallback);
last_tc = ec->last_tc;
max_output = last_tc ? last_tc->transcoder->max_output : 1;
resume:
ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
if (!NIL_P(fallback) && ret == econv_undefined_conversion) {
VALUE rep = rb_enc_str_new(
(const char *)ec->last_error.error_bytes_start,
ec->last_error.error_bytes_len,
rb_enc_find(ec->last_error.source_encoding));
rep = rb_hash_lookup2(fallback, rep, Qundef);
if (rep != Qundef) {
StringValue(rep);
ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
if (ret == -1) {
rb_raise(rb_eArgError, "too big fallback string");
}
goto resume;
}
}
if (ret == econv_invalid_byte_sequence ||
ret == econv_incomplete_input ||
ret == econv_undefined_conversion) {
@ -2442,6 +2462,7 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
return 0;
}
ecflags = econv_opts(opthash);
v = rb_hash_aref(opthash, sym_replace);
if (!NIL_P(v)) {
StringValue(v);
@ -2456,6 +2477,16 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
rb_hash_aset(newhash, sym_replace, v);
}
v = rb_hash_aref(opthash, sym_fallback);
if (!NIL_P(v)) {
v = rb_convert_type(v, T_HASH, "Hash", "to_hash");
if (!NIL_P(v)) {
if (NIL_P(newhash))
newhash = rb_hash_new();
rb_hash_aset(newhash, sym_fallback, v);
}
}
if (!NIL_P(newhash))
rb_hash_freeze(newhash);
*opts = newhash;
@ -2728,6 +2759,11 @@ str_encode_bang(int argc, VALUE *argv, VALUE str)
* :replace ::
* Sets the replacement string to the value. The default replacement
* string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
* :fallback ::
* Sets the replacement string by the hash for undefined character.
* Its key is a such undefined character encoded in source encoding
* of current transcoder. Its value can be any encoding until it
* can be converted into the destination encoding of the transcoder.
* :xml ::
* The value must be <code>:text</code> or <code>:attr</code>.
* If the value is <code>:text</code> <code>#encode</code> replaces
@ -4193,6 +4229,7 @@ Init_transcode(void)
sym_invalid = ID2SYM(rb_intern("invalid"));
sym_undef = ID2SYM(rb_intern("undef"));
sym_replace = ID2SYM(rb_intern("replace"));
sym_fallback = ID2SYM(rb_intern("fallback"));
sym_xml = ID2SYM(rb_intern("xml"));
sym_text = ID2SYM(rb_intern("text"));
sym_attr = ID2SYM(rb_intern("attr"));