1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

merge revision(s) 28174:28178:

* re.c (rb_reg_expr_str): ASCII incompatible strings
	  must always escape or converted.

	* re.c (rb_reg_expr_str): use rb_str_buf_cat_escaped_char
	  when resenc is given: for Regexp#inspect or error message.
	  * re.c (rb_reg_desc): add 'n' for ENCODING_NONE.

	* string.c (sym_inspect): Escape when the symbol is not
	  resulted encoding and not ascii_only. It had escaped
	  ascii-incompatible string, but it is wrong.

	* string.c (rb_str_buf_cat_escaped_char): defined.
	  Splited from rb_str_inspect.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_2@28180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2010-06-05 14:16:05 +00:00
parent e1c14eabd5
commit 09e9807a89
5 changed files with 109 additions and 45 deletions

View file

@ -1,3 +1,24 @@
Sat Jun 5 23:15:42 2010 NARUSE, Yui <naruse@ruby-lang.org>
* re.c (rb_reg_expr_str): ASCII incompatible strings
must always escape or converted.
* re.c (rb_reg_expr_str): use rb_str_buf_cat_escaped_char
when resenc is given: for Regexp#inspect or error message.
* re.c (rb_reg_desc): add 'n' for ENCODING_NONE.
Sat Jun 5 23:15:42 2010 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (sym_inspect): Escape when the symbol is not
resulted encoding and not ascii_only. It had escaped
ascii-incompatible string, but it is wrong.
Sat Jun 5 23:15:42 2010 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (rb_str_buf_cat_escaped_char): defined.
Splited from rb_str_inspect.
Sat Jun 5 23:14:51 2010 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (rb_str_inspect): inspect as ASCII when the codepoint

73
re.c
View file

@ -314,32 +314,47 @@ rb_reg_check(VALUE re)
}
}
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p);
static void
rb_reg_expr_str(VALUE str, const char *s, long len)
rb_reg_expr_str(VALUE str, const char *s, long len,
rb_encoding *enc, rb_encoding *resenc)
{
rb_encoding *enc = rb_enc_get(str);
const char *p, *pend;
int need_escape = 0;
int c, clen;
p = s; pend = p + len;
while (p<pend) {
c = rb_enc_ascget(p, pend, &clen, enc);
if (c == -1) {
p += mbclen(p, pend, enc);
}
else if (c != '/' && rb_enc_isprint(c, enc)) {
p += clen;
}
else {
need_escape = 1;
break;
}
if (rb_enc_asciicompat(enc)) {
while (p < pend) {
c = rb_enc_ascget(p, pend, &clen, enc);
if (c == -1) {
if (enc == resenc) {
p += mbclen(p, pend, enc);
}
else {
need_escape = 1;
break;
}
}
else if (c != '/' && rb_enc_isprint(c, enc)) {
p += clen;
}
else {
need_escape = 1;
break;
}
}
}
else {
need_escape = 1;
}
if (!need_escape) {
rb_str_buf_cat(str, s, len);
}
else {
int unicode_p = rb_enc_unicode_p(enc);
p = s;
while (p<pend) {
c = rb_enc_ascget(p, pend, &clen, enc);
@ -355,8 +370,15 @@ rb_reg_expr_str(VALUE str, const char *s, long len)
rb_str_buf_cat(str, p, clen);
}
else if (c == -1) {
int l = mbclen(p, pend, enc);
rb_str_buf_cat(str, p, l);
int l;
if (resenc) {
unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
l = rb_str_buf_cat_escaped_char(str, c, unicode_p);
}
else {
l = mbclen(p, pend, enc);
rb_str_buf_cat(str, p, l);
}
p += l;
continue;
}
@ -380,20 +402,26 @@ rb_reg_expr_str(VALUE str, const char *s, long len)
static VALUE
rb_reg_desc(const char *s, long len, VALUE re)
{
rb_encoding *enc = rb_enc_get(re);
VALUE str = rb_str_buf_new2("/");
if (re && rb_enc_asciicompat(rb_enc_get(re))) {
rb_encoding *resenc = rb_default_internal_encoding();
if (resenc == NULL) resenc = rb_default_external_encoding();
if (re && rb_enc_asciicompat(enc)) {
rb_enc_copy(str, re);
}
else {
rb_enc_associate(str, rb_usascii_encoding());
}
rb_reg_expr_str(str, s, len);
rb_reg_expr_str(str, s, len, enc, resenc);
rb_str_buf_cat2(str, "/");
if (re) {
char opts[4];
rb_reg_check(re);
if (*option_to_str(opts, RREGEXP(re)->ptr->options))
rb_str_buf_cat2(str, opts);
if (RBASIC(re)->flags & REG_ENCODING_NONE)
rb_str_buf_cat2(str, "n");
}
OBJ_INFECT(str, re);
return str;
@ -476,6 +504,7 @@ rb_reg_to_s(VALUE re)
const UChar* ptr;
VALUE str = rb_str_buf_new2("(?");
char optbuf[5];
rb_encoding *enc = rb_enc_get(re);
rb_reg_check(re);
@ -524,7 +553,7 @@ rb_reg_to_s(VALUE re)
++ptr;
len -= 2;
err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
rb_enc_get(re), OnigDefaultSyntax, NULL);
enc, OnigDefaultSyntax, NULL);
onig_free(rp);
}
if (err) {
@ -543,7 +572,7 @@ rb_reg_to_s(VALUE re)
}
rb_str_buf_cat2(str, ":");
rb_reg_expr_str(str, (char*)ptr, len);
rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
rb_str_buf_cat2(str, ")");
rb_enc_copy(str, re);
@ -564,10 +593,12 @@ rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, co
{
char opts[6];
VALUE desc = rb_str_buf_new2(err);
rb_encoding *resenc = rb_default_internal_encoding();
if (resenc == NULL) resenc = rb_default_external_encoding();
rb_enc_associate(desc, enc);
rb_str_buf_cat2(desc, ": /");
rb_reg_expr_str(desc, s, len);
rb_reg_expr_str(desc, s, len, enc, resenc);
opts[0] = '/';
option_to_str(opts + 1, options);
rb_str_buf_cat2(desc, opts);

View file

@ -4078,6 +4078,36 @@ str_cat_char(VALUE str, unsigned int c, rb_encoding *enc)
}
#endif
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
int
rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p) {
char buf[CHAR_ESC_LEN + 1];
int l;
if (unicode_p) {
if (c < 0x7F && ISPRINT(c)) {
snprintf(buf, CHAR_ESC_LEN, "%c", c);
}
else if (c < 0x10000) {
snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
}
else {
snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
}
}
else {
if (c < 0x100) {
snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
}
else {
snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
}
}
l = strlen(buf);
rb_str_buf_cat(result, buf, l);
return l;
}
/*
* call-seq:
* str.inspect -> string
@ -4095,7 +4125,6 @@ rb_str_inspect(VALUE str)
{
rb_encoding *enc = STR_ENC_GET(str);
const char *p, *pend, *prev;
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
char buf[CHAR_ESC_LEN + 1];
VALUE result = rb_str_buf_new(0);
rb_encoding *resenc = rb_default_internal_encoding();
@ -4165,27 +4194,7 @@ rb_str_inspect(VALUE str)
}
else {
if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
if (unicode_p) {
if (c < 0x100 && ISPRINT(c)) {
snprintf(buf, CHAR_ESC_LEN, "%c", c);
}
else if (c < 0x10000) {
snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
}
else {
snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
}
str_buf_cat(result, buf, strlen(buf));
}
else {
if (c < 0x100) {
snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
}
else {
snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
}
str_buf_cat(result, buf, strlen(buf));
}
rb_str_buf_cat_escaped_char(result, c, unicode_p);
prev = p;
continue;
}
@ -7069,12 +7078,14 @@ sym_inspect(VALUE sym)
const char *ptr;
long len;
char *dest;
rb_encoding *resenc = rb_default_internal_encoding();
if (resenc == NULL) resenc = rb_default_external_encoding();
sym = rb_id2str(id);
enc = STR_ENC_GET(sym);
ptr = RSTRING_PTR(sym);
len = RSTRING_LEN(sym);
if (!rb_enc_asciicompat(enc) || len != (long)strlen(ptr) ||
if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) ||
!rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) {
str = rb_str_inspect(sym);
len = RSTRING_LEN(str);

View file

@ -64,6 +64,7 @@ module EnvUtil
module_function :rubyexec
def invoke_ruby(args, stdin_data="", capture_stdout=false, capture_stderr=false, opt={})
args = [args] if args.kind_of?(String)
begin
in_c, in_p = IO.pipe
out_p, out_c = IO.pipe if capture_stdout

View file

@ -151,7 +151,7 @@ class TestRegexp < Test::Unit::TestCase
assert_equal('/\x00/i', /#{"\0"}/i.inspect)
assert_equal("/\n/i", /#{"\n"}/i.inspect)
s = [0xff].pack("C")
assert_equal('/\/'+s+'/i', /\/#{s}/i.inspect)
assert_equal('/\/\xFF/i', /\/#{s}/i.inspect)
end
def test_char_to_option