From 42dcda08aef365e166b8784e0349a28896a871e6 Mon Sep 17 00:00:00 2001 From: naruse Date: Fri, 25 Jan 2008 16:40:02 +0000 Subject: [PATCH] * string.c (rb_str_usascii_new{,2}: defined. (rb_str_new): set US-ASCII and ENC_CODERANGE_7BIT when empty string. * encoding.c (rb_usascii_encoding, rb_usascii_encindex): defined. (rb_enc_inspect, enc_name, rb_locale_charmap, rb_enc_name_list_i): use rb_str_ascii_new. * array.c (recursive_join, inspect_ary): ditto. * object.c (nil_to_s, nil_inspect, true_to_s, false_to_s, rb_mod_to_s): ditto. * hash.c (inspect_hash, rb_hash_inspect, rb_f_getenv, env_fetch, env_clear, env_to_s, env_inspect): ditto. * numeric.c (flo_to_s, int_chr, rb_fix2str): ditto. * bignum.c (rb_big2str): ditto. * file.c (rb_file_ftype, rb_file_s_dirname, rb_file_s_extname, file_inspect_join, Init_file): ditto. * test/ruby/test_ruby_m17n.rb: add checks for encoding of string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15244 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 27 ++++++++++++++++++++++ array.c | 4 ++-- bignum.c | 2 +- encoding.c | 22 ++++++++++++------ file.c | 12 +++++----- hash.c | 12 +++++----- numeric.c | 17 +++++++++----- object.c | 10 ++++----- string.c | 21 +++++++++++++++++ test/ruby/test_m17n.rb | 51 ++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 145 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index 911638a6e8..906c69b465 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,30 @@ +Sat Jan 26 00:17:18 2008 NARUSE, Yui + + * string.c (rb_str_usascii_new{,2}: defined. + (rb_str_new): set US-ASCII and ENC_CODERANGE_7BIT when empty + string. + + * encoding.c (rb_usascii_encoding, rb_usascii_encindex): defined. + (rb_enc_inspect, enc_name, rb_locale_charmap, rb_enc_name_list_i): + use rb_str_ascii_new. + + * array.c (recursive_join, inspect_ary): ditto. + + * object.c (nil_to_s, nil_inspect, true_to_s, false_to_s, + rb_mod_to_s): ditto. + + * hash.c (inspect_hash, rb_hash_inspect, rb_f_getenv, env_fetch, + env_clear, env_to_s, env_inspect): ditto. + + * numeric.c (flo_to_s, int_chr, rb_fix2str): ditto. + + * bignum.c (rb_big2str): ditto. + + * file.c (rb_file_ftype, rb_file_s_dirname, rb_file_s_extname, + file_inspect_join, Init_file): ditto. + + * test/ruby/test_ruby_m17n.rb: add checks for encoding of string. + Sat Jan 26 01:35:46 2008 Tanaka Akira * marshal.c (r_byte): use getbyte instead of getc. diff --git a/array.c b/array.c index bd7fde55f6..097a998cfe 100644 --- a/array.c +++ b/array.c @@ -1233,7 +1233,7 @@ recursive_join(VALUE ary, VALUE argp, int recur) { VALUE *arg = (VALUE *)argp; if (recur) { - return rb_str_new2("[...]"); + return rb_usascii_str_new2("[...]"); } return rb_ary_join(arg[0], arg[1]); } @@ -1337,7 +1337,7 @@ inspect_ary(VALUE ary, VALUE dummy, int recur) static VALUE rb_ary_inspect(VALUE ary) { - if (RARRAY_LEN(ary) == 0) return rb_str_new2("[]"); + if (RARRAY_LEN(ary) == 0) return rb_usascii_str_new2("[]"); return rb_exec_recursive(inspect_ary, ary, 0); } diff --git a/bignum.c b/bignum.c index 5ed026acdf..669e422331 100644 --- a/bignum.c +++ b/bignum.c @@ -904,7 +904,7 @@ rb_big2str0(VALUE x, int base, int trim) return rb_fix2str(x, base); } if (BIGZEROP(x)) { - return rb_str_new2("0"); + return rb_usascii_str_new2("0"); } if (base < 2 || 36 < base) diff --git a/encoding.c b/encoding.c index bddae485b9..961c3c4f3d 100644 --- a/encoding.c +++ b/encoding.c @@ -838,9 +838,11 @@ rb_enc_tolower(int c, rb_encoding *enc) static VALUE enc_inspect(VALUE self) { - return rb_sprintf("#<%s:%s%s>", rb_obj_classname(self), + VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self), rb_enc_name((rb_encoding*)DATA_PTR(self)), (ENC_DUMMY_P(self) ? " (dummy)" : "")); + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; } /* @@ -854,7 +856,7 @@ enc_inspect(VALUE self) static VALUE enc_name(VALUE self) { - return rb_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self))); + return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self))); } static VALUE @@ -993,6 +995,12 @@ rb_usascii_encoding(void) return enc_table.list[ENCINDEX_US_ASCII].enc; } +int +rb_usascii_encindex(void) +{ + return ENCINDEX_US_ASCII; +} + rb_encoding * rb_locale_encoding(void) { @@ -1066,11 +1074,11 @@ VALUE rb_locale_charmap(VALUE klass) { #if defined NO_LOCALE_CHARMAP - return rb_str_new2("ASCII-8BIT"); + return rb_usascii_str_new2("ASCII-8BIT"); #elif defined HAVE_LANGINFO_H char *codeset; codeset = nl_langinfo(CODESET); - return rb_str_new2(codeset); + return rb_usascii_str_new2(codeset); #elif defined _WIN32 return rb_sprintf("CP%d", GetACP()); #else @@ -1128,7 +1136,7 @@ static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg) { VALUE ary = (VALUE)arg; - VALUE str = rb_str_new2((char *)name); + VALUE str = rb_usascii_str_new2((char *)name); OBJ_FREEZE(str); rb_ary_push(ary, str); return ST_CONTINUE; @@ -1172,11 +1180,11 @@ rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg) if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) { return ST_CONTINUE; } - str = rb_str_new2(rb_enc_name(enc)); + str = rb_usascii_str_new2(rb_enc_name(enc)); OBJ_FREEZE(str); rb_ary_store(ary, idx, str); } - key = rb_str_new2((char *)name); + key = rb_usascii_str_new2((char *)name); OBJ_FREEZE(key); rb_hash_aset(aliases, key, str); return ST_CONTINUE; diff --git a/file.c b/file.c index 18caf6b8a8..4a41e0b444 100644 --- a/file.c +++ b/file.c @@ -1632,7 +1632,7 @@ rb_file_ftype(const struct stat *st) t = "unknown"; } - return rb_str_new2(t); + return rb_usascii_str_new2(t); } /* @@ -2917,7 +2917,7 @@ rb_file_s_dirname(VALUE klass, VALUE fname) p = root; } if (p == name) - return rb_str_new2("."); + return rb_usascii_str_new2("."); #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && isdirsep(*(name + 2))) { const char *top = skiproot(name + 2); @@ -2965,7 +2965,7 @@ rb_file_s_extname(VALUE klass, VALUE fname) e = strrchr(p, '.'); /* get the last dot of the last component */ if (!e || e == p || !e[1]) /* no dot, or the only dot is first or end? */ - return rb_str_new2(""); + return rb_str_new(0, 0); extname = rb_str_new(e, chompdirsep(e) - e); /* keep the dot, too! */ OBJ_INFECT(extname, fname); return extname; @@ -3014,7 +3014,7 @@ static VALUE file_inspect_join(VALUE ary, VALUE argp, int recur) { VALUE *arg = (VALUE *)argp; - if (recur) return rb_str_new2("[...]"); + if (recur) return rb_usascii_str_new2("[...]"); return rb_file_join(arg[0], arg[1]); } @@ -4516,14 +4516,14 @@ Init_File(void) rb_define_singleton_method(rb_cFile, "extname", rb_file_s_extname, 1); rb_define_singleton_method(rb_cFile, "path", rb_file_s_path, 1); - separator = rb_obj_freeze(rb_str_new2("/")); + separator = rb_obj_freeze(rb_usascii_str_new2("/")); rb_define_const(rb_cFile, "Separator", separator); rb_define_const(rb_cFile, "SEPARATOR", separator); rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1); rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2); #ifdef DOSISH - rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_str_new2("\\"))); + rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_usascii_str_new2("\\"))); #else rb_define_const(rb_cFile, "ALT_SEPARATOR", Qnil); #endif diff --git a/hash.c b/hash.c index 430ebac779..707f8a676f 100644 --- a/hash.c +++ b/hash.c @@ -1169,7 +1169,7 @@ inspect_hash(VALUE hash, VALUE dummy, int recur) { VALUE str; - if (recur) return rb_str_new2("{...}"); + if (recur) return rb_usascii_str_new2("{...}"); str = rb_str_buf_new2("{"); rb_hash_foreach(hash, inspect_i, str); rb_str_buf_cat2(str, "}"); @@ -1193,7 +1193,7 @@ static VALUE rb_hash_inspect(VALUE hash) { if (RHASH_EMPTY_P(hash)) - return rb_str_new2("{}"); + return rb_usascii_str_new2("{}"); return rb_exec_recursive(inspect_hash, hash, 0); } @@ -1821,7 +1821,7 @@ rb_f_getenv(VALUE obj, VALUE name) if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) #endif { - VALUE str = rb_str_new2(env); + VALUE str = rb_usascii_str_new2(env); rb_obj_freeze(str); return str; @@ -1862,7 +1862,7 @@ env_fetch(int argc, VALUE *argv) #else if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) #endif - return rb_str_new2(env); + return rb_usascii_str_new2(env); return env_str_new2(env); } @@ -2217,7 +2217,7 @@ env_clear(void) static VALUE env_to_s(void) { - return rb_str_new2("ENV"); + return rb_usascii_str_new2("ENV"); } static VALUE @@ -2239,7 +2239,7 @@ env_inspect(void) rb_str_buf_cat2(str, "\""); rb_str_buf_cat(str, *env, s-*env); rb_str_buf_cat2(str, "\"=>"); - i = rb_inspect(rb_str_new2(s+1)); + i = rb_inspect(rb_usascii_str_new2(s+1)); rb_str_buf_append(str, i); } env++; diff --git a/numeric.c b/numeric.c index f6376c3e18..c3b169a70e 100644 --- a/numeric.c +++ b/numeric.c @@ -504,9 +504,9 @@ flo_to_s(VALUE flt) char *p, *e; if (isinf(value)) - return rb_str_new2(value < 0 ? "-Infinity" : "Infinity"); + return rb_usascii_str_new2(value < 0 ? "-Infinity" : "Infinity"); else if(isnan(value)) - return rb_str_new2("NaN"); + return rb_usascii_str_new2("NaN"); sprintf(buf, "%#.15g", value); /* ensure to print decimal point */ if (!(e = strchr(buf, 'e'))) { @@ -522,7 +522,7 @@ flo_to_s(VALUE flt) while (p[-1]=='0' && ISDIGIT(p[-2])) p--; memmove(p, e, strlen(e)+1); - return rb_str_new2(buf); + return rb_usascii_str_new2(buf); } /* @@ -1851,7 +1851,12 @@ int_chr(int argc, VALUE *argv, VALUE num) rb_raise(rb_eRangeError, "%ld out of char range", i); } c = i; - return rb_str_new(&c, 1); + if (i < 0x80) { + return rb_usascii_str_new(&c, 1); + } + else { + return rb_str_new(&c, 1); + } case 1: break; default: @@ -1968,7 +1973,7 @@ rb_fix2str(VALUE x, int base) rb_raise(rb_eArgError, "invalid radix %d", base); } if (val == 0) { - return rb_str_new2("0"); + return rb_usascii_str_new2("0"); } if (val < 0) { val = -val; @@ -1982,7 +1987,7 @@ rb_fix2str(VALUE x, int base) *--b = '-'; } - return rb_str_new2(b); + return rb_usascii_str_new2(b); } /* diff --git a/object.c b/object.c index ddf02b298b..7970c51171 100644 --- a/object.c +++ b/object.c @@ -806,7 +806,7 @@ nil_to_f(VALUE obj) static VALUE nil_to_s(VALUE obj) { - return rb_str_new2(""); + return rb_str_new(0, 0); } /* @@ -836,7 +836,7 @@ nil_to_a(VALUE obj) static VALUE nil_inspect(VALUE obj) { - return rb_str_new2("nil"); + return rb_usascii_str_new2("nil"); } /*********************************************************************** @@ -859,7 +859,7 @@ nil_inspect(VALUE obj) static VALUE true_to_s(VALUE obj) { - return rb_str_new2("true"); + return rb_usascii_str_new2("true"); } @@ -936,7 +936,7 @@ true_xor(VALUE obj, VALUE obj2) static VALUE false_to_s(VALUE obj) { - return rb_str_new2("false"); + return rb_usascii_str_new2("false"); } /* @@ -1090,7 +1090,7 @@ static VALUE rb_mod_to_s(VALUE klass) { if (FL_TEST(klass, FL_SINGLETON)) { - VALUE s = rb_str_new2("#<"); + VALUE s = rb_usascii_str_new2("#<"); VALUE v = rb_iv_get(klass, "__attached__"); rb_str_cat2(s, "Class:"); diff --git a/string.c b/string.c index 1391ac3164..684f88025a 100644 --- a/string.c +++ b/string.c @@ -278,6 +278,9 @@ str_new(VALUE klass, const char *ptr, long len) if (ptr) { memcpy(RSTRING_PTR(str), ptr, len); } + else { + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + } STR_SET_LEN(str, len); RSTRING_PTR(str)[len] = '\0'; return str; @@ -289,6 +292,15 @@ rb_str_new(const char *ptr, long len) return str_new(rb_cString, ptr, len); } +VALUE +rb_usascii_str_new(const char *ptr, long len) +{ + VALUE str = str_new(rb_cString, ptr, len); + + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} + VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc) { @@ -307,6 +319,15 @@ rb_str_new2(const char *ptr) return rb_str_new(ptr, strlen(ptr)); } +VALUE +rb_usascii_str_new2(const char *ptr) +{ + if (!ptr) { + rb_raise(rb_eArgError, "NULL pointer given"); + } + return rb_usascii_str_new(ptr, strlen(ptr)); +} + VALUE rb_tainted_str_new(const char *ptr, long len) { diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index dbeb1c10af..2552fd2272 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -962,4 +962,55 @@ class TestM17N < Test::Unit::TestCase assert_equal(Encoding::ASCII_8BIT, v.encoding) } end + + def test_empty_string + assert_equal("".encoding, Encoding::US_ASCII) + end + + def test_nil_to_s + assert_equal(nil.to_s.encoding, Encoding::US_ASCII) + end + + def test_nil_inspect + assert_equal(nil.inspect.encoding, Encoding::US_ASCII) + end + + def test_true_to_s + assert_equal(true.to_s.encoding, Encoding::US_ASCII) + end + + def test_false_to_s + assert_equal(false.to_s.encoding, Encoding::US_ASCII) + end + + def test_fixnum_to_s + assert_equal(1.to_s.encoding, Encoding::US_ASCII) + end + + def test_float_to_s + assert_equal(1.0.to_s.encoding, Encoding::US_ASCII) + end + + def test_bignum_to_s + assert_equal((1<<129).to_s.encoding, Encoding::US_ASCII) + end + + def test_array_to_s + assert_equal([].to_s.encoding, Encoding::US_ASCII) + assert_equal([nil].to_s.encoding, Encoding::US_ASCII) + assert_equal([1].to_s.encoding, Encoding::US_ASCII) + assert_equal([""].to_s.encoding, Encoding::US_ASCII) + assert_equal(["a"].to_s.encoding, Encoding::US_ASCII) + assert_equal([nil,1,"","a","\x20",[]].to_s.encoding, Encoding::US_ASCII) + end + + def test_hash_to_s + assert_equal({}.to_s.encoding, Encoding::US_ASCII) + assert_equal({1=>nil,"foo"=>""}.to_s.encoding, Encoding::US_ASCII) + end + + def test_encoding_to_s + assert_equal(Encoding::US_ASCII.to_s.encoding, Encoding::US_ASCII) + assert_equal(Encoding::US_ASCII.inspect.encoding, Encoding::US_ASCII) + end end