From ccf5372b252a229ddb6693db132edc013419ccdc Mon Sep 17 00:00:00 2001 From: matz Date: Sat, 2 Sep 2006 15:05:27 +0000 Subject: [PATCH] * object.c (Init_Object): move symbol related code to string.c * string.c (Init_String): Symbol as subclass of String. * parse.y (rb_intern2): handle symbol as strings. * string.c (str_new): substring of symbols are mere strings, not symbols. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10834 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 18 +++++ class.c | 2 +- gc.c | 1 + object.c | 198 ------------------------------------------------- parse.y | 75 +++++++++++++++---- ruby.h | 10 ++- string.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 301 insertions(+), 219 deletions(-) diff --git a/ChangeLog b/ChangeLog index cb8b172f8f..da69b4cf0a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Sat Sep 2 23:53:28 2006 Yukihiro Matsumoto + + * object.c (Init_Object): move symbol related code to string.c + + * string.c (Init_String): Symbol as subclass of String. + + * parse.y (rb_intern2): handle symbol as strings. + + * string.c (str_new): substring of symbols are mere strings, not + symbols. + Sat Sep 2 23:37:29 2006 Yukihiro Matsumoto * ruby.h (struct RArray): embed small arrays. @@ -17,6 +28,13 @@ Sat Sep 2 12:06:35 2006 NAKAMURA, Hiroshi XML attribute which value is nil. value "" and nil both were dumped as 'attr="value"'. [ruby-dev:29395] +Sat Sep 2 11:47:58 2006 Yukihiro Matsumoto + + * eval.c (rb_eval): should handle when in else clause. a patch + from Eric Hodel . [ruby-core:08662] + + * parse.y (primary): wrap with NODE_CASE. [ruby-core:08663] + Sat Sep 2 12:00:32 2006 NAKAMURA, Hiroshi * lib/csv.rb (CSV::IOReader#initialize): use String#[](pos, len) diff --git a/class.c b/class.c index c7aa72344d..81956e5feb 100644 --- a/class.c +++ b/class.c @@ -489,7 +489,7 @@ ins_methods_push(ID name, long type, VALUE ary, long visi) break; } if (visi) { - rb_ary_push(ary, rb_str_new2(rb_id2name(name))); + rb_ary_push(ary, ID2SYM(name)); } return ST_CONTINUE; } diff --git a/gc.c b/gc.c index 0ae9bb62a0..8681175191 100644 --- a/gc.c +++ b/gc.c @@ -1381,6 +1381,7 @@ garbage_collect(void) (VALUE*)((char*)rb_gc_stack_start + 2)); #endif rb_gc_mark_threads(); + rb_gc_mark_symbols(); /* mark protected global variables */ for (list = global_List; list; list = list->next) { diff --git a/object.c b/object.c index 51a173658b..22f9ca7da5 100644 --- a/object.c +++ b/object.c @@ -30,7 +30,6 @@ VALUE rb_cData; VALUE rb_cNilClass; VALUE rb_cTrueClass; VALUE rb_cFalseClass; -VALUE rb_cSymbol; static ID id_eq, id_eql, id_inspect, id_init_copy; @@ -938,149 +937,6 @@ rb_obj_pattern_match(VALUE obj1, VALUE obj2) return Qnil; } -/********************************************************************** - * Document-class: Symbol - * - * Symbol objects represent names and some strings - * inside the Ruby - * interpreter. They are generated using the :name and - * :"string" literals - * syntax, and by the various to_sym methods. The same - * Symbol object will be created for a given name or string - * for the duration of a program's execution, regardless of the context - * or meaning of that name. Thus if Fred is a constant in - * one context, a method in another, and a class in a third, the - * Symbol :Fred will be the same object in - * all three contexts. - * - * module One - * class Fred - * end - * $f1 = :Fred - * end - * module Two - * Fred = 1 - * $f2 = :Fred - * end - * def Fred() - * end - * $f3 = :Fred - * $f1.id #=> 2514190 - * $f2.id #=> 2514190 - * $f3.id #=> 2514190 - * - */ - -/* - * call-seq: - * sym.to_i => fixnum - * - * Returns an integer that is unique for each symbol within a - * particular execution of a program. - * - * :fred.to_i #=> 9809 - * "fred".to_sym.to_i #=> 9809 - */ - -static VALUE -sym_to_i(VALUE sym) -{ - ID id = SYM2ID(sym); - - return LONG2FIX(id); -} - - -/* - * call-seq: - * sym.inspect => string - * - * Returns the representation of sym as a symbol literal. - * - * :fred.inspect #=> ":fred" - */ - -static VALUE -sym_inspect(VALUE sym) -{ - VALUE str; - const char *name; - ID id = SYM2ID(sym); - - name = rb_id2name(id); - str = rb_str_new(0, strlen(name)+1); - RSTRING_PTR(str)[0] = ':'; - strcpy(RSTRING_PTR(str)+1, name); - if (!rb_symname_p(name)) { - str = rb_str_dump(str); - strncpy(RSTRING_PTR(str), ":\"", 2); - } - return str; -} - - -/* - * call-seq: - * sym.id2name => string - * sym.to_s => string - * - * Returns the name or string corresponding to sym. - * - * :fred.id2name #=> "fred" - */ - - -static VALUE -sym_to_s(VALUE sym) -{ - return rb_str_new2(rb_id2name(SYM2ID(sym))); -} - - -/* - * call-seq: - * sym.to_sym => sym - * - * In general, to_sym returns the Symbol corresponding - * to an object. As sym is already a symbol, self is returned - * in this case. - */ - -static VALUE -sym_to_sym(VALUE sym) -{ - return sym; -} - -static VALUE -sym_call(VALUE args, VALUE sym) -{ - VALUE obj; - - if (RARRAY_LEN(args) < 1) { - rb_raise(rb_eArgError, "no receiver given"); - } - obj = RARRAY_PTR(args)[0]; - return rb_funcall3(obj, (ID)sym, - RARRAY_LEN(args) - 1, - RARRAY_PTR(args) + 1); -} - -/* - * call-seq: - * sym.to_proc - * - * Returns a _Proc_ object which respond to the given method by _sym_. - * - * (1..3).collect(&:to_s) #=> ["1", "2", "3"] - */ - -static VALUE -sym_to_proc(VALUE sym) -{ - return rb_proc_new(sym_call, (VALUE)SYM2ID(sym)); -} - /*********************************************************************** * @@ -1473,47 +1329,6 @@ rb_class_superclass(VALUE klass) return super; } -static ID -str_to_id(VALUE str) -{ - if (!RSTRING_PTR(str) || RSTRING_LEN(str) == 0) { - rb_raise(rb_eArgError, "empty symbol string"); - } - if (RSTRING_LEN(str) != strlen(RSTRING_PTR(str))) { - rb_raise(rb_eArgError, "Symbols should not contain NUL (\\0)"); - } - return rb_intern(RSTRING_PTR(str)); -} - -ID -rb_to_id(VALUE name) -{ - VALUE tmp; - ID id; - - switch (TYPE(name)) { - case T_STRING: - return str_to_id(name); - case T_FIXNUM: - rb_warn("do not use Fixnums as Symbols"); - id = FIX2LONG(name); - if (!rb_id2name(id)) { - rb_raise(rb_eArgError, "%ld is not a symbol", id); - } - break; - case T_SYMBOL: - id = SYM2ID(name); - break; - default: - tmp = rb_check_string_type(name); - if (!NIL_P(tmp)) { - return str_to_id(tmp); - } - rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name))); - } - return id; -} - /* * call-seq: * attr_reader(symbol, ...) => nil @@ -2449,19 +2264,6 @@ Init_Object(void) rb_undef_method(CLASS_OF(rb_cNilClass), "new"); rb_define_global_const("NIL", Qnil); - rb_cSymbol = rb_define_class("Symbol", rb_cObject); - rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */ - rb_undef_alloc_func(rb_cSymbol); - rb_undef_method(CLASS_OF(rb_cSymbol), "new"); - - rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0); - rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0); - rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0); - rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0); - rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0); - rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0); - rb_define_method(rb_cSymbol, "===", rb_obj_equal, 1); - rb_define_method(rb_cModule, "freeze", rb_mod_freeze, 0); rb_define_method(rb_cModule, "===", rb_mod_eqq, 1); rb_define_method(rb_cModule, "==", rb_obj_equal, 1); diff --git a/parse.y b/parse.y index d1984259bd..34d1183777 100644 --- a/parse.y +++ b/parse.y @@ -8255,15 +8255,26 @@ static const struct { static struct symbols { ID last_id; - st_table *tbl; - st_table *rev; + st_table *sym_id; + st_table *id_sym; } global_symbols = {tLAST_TOKEN}; +static struct st_hash_type symhash = { + rb_str_cmp, + rb_str_hash, +}; + void Init_sym(void) { - global_symbols.tbl = st_init_strtable_with_size(200); - global_symbols.rev = st_init_numtable_with_size(200); + global_symbols.sym_id = st_init_table_with_size(&symhash, 1000); + global_symbols.id_sym = st_init_numtable_with_size(1000); +} + +void +rb_gc_mark_symbols(int lev) +{ + rb_mark_tbl(global_symbols.id_sym); } static ID @@ -8368,16 +8379,17 @@ rb_symname_p(const char *name) } ID -rb_intern(const char *name) +rb_intern2(const char *name, long len) { const char *m = name; + VALUE sym = rb_str_new(name, len); ID id; int last; - if (st_lookup(global_symbols.tbl, (st_data_t)name, (st_data_t *)&id)) + if (st_lookup(global_symbols.sym_id, (st_data_t)sym, (st_data_t *)&id)) return id; - last = strlen(name)-1; + last = len-1; id = 0; switch (*name) { case '$': @@ -8438,12 +8450,42 @@ rb_intern(const char *name) new_id: id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; id_register: - name = strdup(name); - st_add_direct(global_symbols.tbl, (st_data_t)name, id); - st_add_direct(global_symbols.rev, id, (st_data_t)name); + RBASIC(sym)->klass = rb_cSymbol; + OBJ_FREEZE(sym); + st_add_direct(global_symbols.sym_id, (st_data_t)sym, id); + st_add_direct(global_symbols.id_sym, id, (st_data_t)sym); return id; } +ID +rb_intern(const char *name) +{ + return rb_intern2(name, strlen(name)); +} + +VALUE +rb_id2sym(ID id) +{ + VALUE data; + + if (st_lookup(global_symbols.id_sym, id, &data)) { + if (!RBASIC(data)->klass) { + RBASIC(data)->klass = rb_cSymbol; + } + return data; + } +} + +ID +rb_sym2id(VALUE sym) +{ + ID data; + + if (st_lookup(global_symbols.sym_id, sym, &data)) + return data; + return rb_intern2(RSTRING_PTR(sym), RSTRING_LEN(sym)); +} + const char * rb_id2name(ID id) { @@ -8459,8 +8501,8 @@ rb_id2name(ID id) } } - if (st_lookup(global_symbols.rev, id, &data)) - return (char *)data; + if (st_lookup(global_symbols.id_sym, id, &data)) + return RSTRING_PTR(data); if (is_attrset_id(id)) { ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL; @@ -8484,9 +8526,9 @@ rb_id2name(ID id) } static int -symbols_i(char *key, ID value, VALUE ary) +symbols_i(VALUE sym, ID value, VALUE ary) { - rb_ary_push(ary, ID2SYM(value)); + rb_ary_push(ary, sym); return ST_CONTINUE; } @@ -8509,9 +8551,9 @@ symbols_i(char *key, ID value, VALUE ary) VALUE rb_sym_all_symbols(void) { - VALUE ary = rb_ary_new2(global_symbols.tbl->num_entries); + VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries); - st_foreach(global_symbols.tbl, symbols_i, ary); + st_foreach(global_symbols.sym_id, symbols_i, ary); return ary; } @@ -9261,3 +9303,4 @@ Init_ripper(void) rb_intern("&&"); } #endif /* RIPPER */ + diff --git a/ruby.h b/ruby.h index b9943fdd80..c6cf32186d 100644 --- a/ruby.h +++ b/ruby.h @@ -195,10 +195,11 @@ VALUE rb_ull2inum(unsigned LONG_LONG); #define IMMEDIATE_MASK 0x03 #define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK) -#define SYMBOL_FLAG 0x0e -#define SYMBOL_P(x) (((VALUE)(x)&0xff)==SYMBOL_FLAG) -#define ID2SYM(x) ((VALUE)(((long)(x))<<8|SYMBOL_FLAG)) -#define SYM2ID(x) RSHIFT((VALUE)x,8) +#define SYMBOL_P(x) (!IMMEDIATE_P(x) && RBASIC(x)->klass == rb_cSymbol) +VALUE rb_id2sym(ID); +ID rb_sym2id(VALUE); +#define ID2SYM(x) rb_id2sym(x) +#define SYM2ID(x) rb_sym2id(x) /* special contants - i.e. non-zero and non-fixnum constants */ #define Qfalse ((VALUE)0) @@ -596,6 +597,7 @@ void rb_gc_register_address(VALUE*); void rb_gc_unregister_address(VALUE*); ID rb_intern(const char*); +ID rb_intern2(const char*, long); const char *rb_id2name(ID); ID rb_to_id(VALUE); diff --git a/string.c b/string.c index 05d0d32325..8bb3158917 100644 --- a/string.c +++ b/string.c @@ -26,6 +26,7 @@ #endif VALUE rb_cString; +VALUE rb_cSymbol; #define STR_TMPLOCK FL_USER7 #define STR_NOEMBED FL_USER1 @@ -134,6 +135,7 @@ str_new(VALUE klass, const char *ptr, long len) rb_raise(rb_eArgError, "negative string size (or size too big)"); } + if (klass == rb_cSymbol) klass = rb_cString; str = str_alloc(klass); if (len > RSTRING_EMBED_LEN_MAX) { RSTRING(str)->as.heap.aux.capa = len; @@ -4369,6 +4371,207 @@ rb_str_setter(VALUE val, ID id, VALUE *var) } +/********************************************************************** + * Document-class: Symbol + * + * Symbol objects represent names and some strings + * inside the Ruby + * interpreter. They are generated using the :name and + * :"string" literals + * syntax, and by the various to_sym methods. The same + * Symbol object will be created for a given name or string + * for the duration of a program's execution, regardless of the context + * or meaning of that name. Thus if Fred is a constant in + * one context, a method in another, and a class in a third, the + * Symbol :Fred will be the same object in + * all three contexts. + * + * module One + * class Fred + * end + * $f1 = :Fred + * end + * module Two + * Fred = 1 + * $f2 = :Fred + * end + * def Fred() + * end + * $f3 = :Fred + * $f1.id #=> 2514190 + * $f2.id #=> 2514190 + * $f3.id #=> 2514190 + * + */ + + +/* + * call-seq: + * Symbol.new(str) => new_sym + * Symbol.intern(str) => new_sym + * + * Returns a new symbol corresponding to str. + */ + +static VALUE +rb_sym_s_intern(VALUE s) +{ + if (rb_class_real(s) == rb_cSymbol) { + return s; + } + StringValue(s); + return rb_intern2(RSTRING_PTR(s), RSTRING_LEN(s)); +} + +/* + * call-seq: + * sym.to_i => fixnum + * + * Returns an integer that is unique for each symbol within a + * particular execution of a program. + * + * :fred.to_i #=> 9809 + * "fred".to_sym.to_i #=> 9809 + */ + +static VALUE +sym_to_i(VALUE sym) +{ + ID id = SYM2ID(sym); + + return LONG2FIX(id); +} + + +/* + * call-seq: + * sym.inspect => string + * + * Returns the representation of sym as a symbol literal. + * + * :fred.inspect #=> ":fred" + */ + +static VALUE +sym_inspect(VALUE sym) +{ + VALUE str; + + str = rb_str_new(0, RSTRING_LEN(sym)+1); + RSTRING_PTR(str)[0] = ':'; + memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym)); + if (!rb_symname_p(RSTRING_PTR(sym))) { + str = rb_str_dump(str); + strncpy(RSTRING_PTR(str), ":\"", 2); + } + return str; +} + + +/* + * call-seq: + * sym.id2name => string + * sym.to_s => string + * + * Returns the name or string corresponding to sym. + * + * :fred.id2name #=> "fred" + */ + + +static VALUE +sym_to_s(VALUE sym) +{ + return rb_str_new(RSTRING_PTR(sym), RSTRING_LEN(sym)); +} + + +/* + * call-seq: + * sym.to_sym => sym + * sym.intern => sym + * + * In general, to_sym returns the Symbol corresponding + * to an object. As sym is already a symbol, self is returned + * in this case. + */ + +static VALUE +sym_to_sym(VALUE sym) +{ + return sym; +} + +static VALUE +sym_call(VALUE args, VALUE sym) +{ + VALUE obj; + + if (RARRAY_LEN(args) < 1) { + rb_raise(rb_eArgError, "no receiver given"); + } + obj = RARRAY_PTR(args)[0]; + return rb_funcall3(obj, (ID)sym, + RARRAY_LEN(args) - 1, + RARRAY_PTR(args) + 1); +} + +/* + * call-seq: + * sym.to_proc + * + * Returns a _Proc_ object which respond to the given method by _sym_. + * + * (1..3).collect(&:to_s) #=> ["1", "2", "3"] + */ + +static VALUE +sym_to_proc(VALUE sym) +{ + return rb_proc_new(sym_call, (VALUE)SYM2ID(sym)); +} + + +static ID +str_to_id(VALUE str) +{ + if (!RSTRING_PTR(str) || RSTRING_LEN(str) == 0) { + rb_raise(rb_eArgError, "empty symbol string"); + } + if (RBASIC(str)->klass == rb_cSymbol) + return str; + return rb_intern2(RSTRING_PTR(str), RSTRING_LEN(str)); +} + +ID +rb_to_id(VALUE name) +{ + VALUE tmp; + ID id; + + switch (TYPE(name)) { + case T_STRING: + return str_to_id(name); + case T_FIXNUM: + rb_warn("do not use Fixnums as Symbols"); + id = FIX2LONG(name); + if (!rb_id2name(id)) { + rb_raise(rb_eArgError, "%ld is not a symbol", id); + } + break; + case T_SYMBOL: + id = SYM2ID(name); + break; + default: + tmp = rb_check_string_type(name); + if (!NIL_P(tmp)) { + return str_to_id(tmp); + } + rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING_PTR(rb_inspect(name))); + } + return id; +} + /* * A String object holds and manipulates an arbitrary sequence of * bytes, typically representing characters. String objects may be created @@ -4496,4 +4699,17 @@ Init_String(void) rb_fs = Qnil; rb_define_variable("$;", &rb_fs); rb_define_variable("$-F", &rb_fs); + + rb_cSymbol = rb_define_class("Symbol", rb_cString); + rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */ + rb_define_singleton_method(rb_cSymbol, "intern", rb_sym_s_intern, 1); + rb_define_singleton_method(rb_cSymbol, "new", rb_sym_s_intern, 1); + + rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0); + rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0); + rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0); + rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0); + rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0); + rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0); + rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0); }