From 5a8db29b59f5f0bd6272bbfd615d356e475c52f1 Mon Sep 17 00:00:00 2001 From: matz Date: Thu, 23 Oct 2008 06:53:55 +0000 Subject: [PATCH] * string.c (rb_str_dump): use \u{ff} escape for UTF-8 encoding string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19897 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ string.c | 42 +++++++++++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index cee9a1c332..e156eb49a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,11 @@ Thu Oct 23 14:08:07 2008 Nobuyoshi Nakada converted before the proto so that the #to_int of the former cannot alter the latter. +Thu Oct 23 10:55:08 2008 Yukihiro Matsumoto + + * string.c (rb_str_dump): use \u{ff} escape for UTF-8 encoding + string. + Thu Oct 23 09:26:22 2008 NAKAMURA Usaku * ext/socket/socket.c (sock_s_getservbyport): cast to get rid of diff --git a/string.c b/string.c index fae5978325..a960970a07 100644 --- a/string.c +++ b/string.c @@ -4090,11 +4090,12 @@ rb_str_inspect(VALUE str) VALUE rb_str_dump(VALUE str) { - rb_encoding *enc0 = rb_enc_get(str); + rb_encoding *enc = rb_enc_get(str); long len; const char *p, *pend; char *q, *qend; VALUE result; + int u8 = (enc == rb_utf8_encoding()); len = 2; /* "" */ p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); @@ -4117,14 +4118,25 @@ rb_str_dump(VALUE str) len++; } else { - len += 4; /* \xNN */ + if (u8) { /* \u{NN} */ + char buf[32]; + int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1; + if (MBCLEN_CHARFOUND_P(n)) { + int cc = rb_enc_codepoint(p-1, pend, enc); + sprintf(buf, "%x", cc); + len += strlen(buf)+4; + p += n; + break; + } + } + len += 4; /* \xNN */ } break; } } - if (!rb_enc_asciicompat(enc0)) { + if (!rb_enc_asciicompat(enc)) { len += 19; /* ".force_encoding('')" */ - len += strlen(enc0->name); + len += strlen(enc->name); } result = rb_str_new5(str, 0, len); @@ -4180,19 +4192,31 @@ rb_str_dump(VALUE str) } else { *q++ = '\\'; + if (u8) { + int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1; + if (MBCLEN_CHARFOUND_P(n)) { + int cc = rb_enc_codepoint(p-1, pend, enc); + p += n; + sprintf(q, "u{%x}", cc); + q += strlen(q); + continue; + } + } sprintf(q, "x%02X", c); q += 3; } } *q++ = '"'; - if (!rb_enc_asciicompat(enc0)) { - sprintf(q, ".force_encoding(\"%s\")", enc0->name); - enc0 = rb_ascii8bit_encoding(); + *q = '\0'; + if (!rb_enc_asciicompat(enc)) { + sprintf(q, ".force_encoding(\"%s\")", enc->name); + enc = rb_ascii8bit_encoding(); } - +// STR_SET_LEN(result, strlen(RSTRING_PTR(result))); OBJ_INFECT(result, str); /* result from dump is ASCII */ - rb_enc_associate(result, enc0); + rb_enc_associate(result, enc); + ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT); return result; }