From e193fa79d6a34e8afc74ce8be256d7dea743c0aa Mon Sep 17 00:00:00 2001 From: akr Date: Wed, 4 Feb 2009 13:01:12 +0000 Subject: [PATCH] * transcode.c (make_econv_exception): show U+XXXX form for undefined conversion error from UTF-8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@22039 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ transcode.c | 18 ++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index aadef36457..81d3916d85 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Wed Feb 4 21:59:31 2009 Tanaka Akira + + * transcode.c (make_econv_exception): show U+XXXX form for undefined + conversion error from UTF-8. + Wed Feb 4 21:57:37 2009 Tanaka Akira * string.c (rb_str_dump): use MBCLEN_CHARFOUND_P properly. diff --git a/transcode.c b/transcode.c index 68fed6a1a6..0320590516 100644 --- a/transcode.c +++ b/transcode.c @@ -2009,9 +2009,23 @@ make_econv_exception(rb_econv_t *ec) if (ec->last_error.result == econv_undefined_conversion) { VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len); - VALUE dumped; + VALUE dumped = Qnil; int idx; - dumped = rb_str_dump(bytes); + if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) { + rb_encoding *utf8 = rb_utf8_encoding(); + const char *start, *end; + int n; + start = (const char *)ec->last_error.error_bytes_start; + end = start + ec->last_error.error_bytes_len; + n = rb_enc_precise_mbclen(start, end, utf8); + if (MBCLEN_CHARFOUND_P(n) && + MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) { + unsigned int cc = rb_enc_codepoint(start, end, utf8); + dumped = rb_sprintf("U+%04X", cc); + } + } + if (dumped == Qnil) + dumped = rb_str_dump(bytes); mesg = rb_sprintf("%s from %s to %s", StringValueCStr(dumped), ec->last_error.source_encoding,