diff --git a/ChangeLog b/ChangeLog index 9aac6984e2..3f1a804b24 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sat Oct 22 13:08:21 2005 Nobuyoshi Nakada + + * object.c (sym_inspect), parse.y (parser_yylex, rb_symname_p): check + if valid as a symbol name more strictly. [ruby-dev:27478] + + * test/ruby/test_symbol.rb: tests for [ruby-core:03573]. + Sat Oct 22 10:08:28 2005 Hirokazu Yamamoto * missing.h, missing/*.c: SUSv3 compatible strcasecmp and strncasecmp, diff --git a/intern.h b/intern.h index 4df7106a7a..ec08a3f5fa 100644 --- a/intern.h +++ b/intern.h @@ -398,6 +398,7 @@ int rb_is_instance_id(ID); int rb_is_class_id(ID); int rb_is_local_id(ID); int rb_is_junk_id(ID); +int rb_symname_p(const char*); VALUE rb_backref_get(void); void rb_backref_set(VALUE); VALUE rb_lastline_get(void); diff --git a/object.c b/object.c index 9f39ec3117..825c4e627b 100644 --- a/object.c +++ b/object.c @@ -1053,7 +1053,7 @@ sym_inspect(VALUE sym) str = rb_str_new(0, strlen(name)+1); RSTRING(str)->ptr[0] = ':'; strcpy(RSTRING(str)->ptr+1, name); - if (rb_is_junk_id(id)) { + if (!rb_symname_p(name)) { str = rb_str_dump(str); strncpy(RSTRING(str)->ptr, ":\"", 2); } diff --git a/parse.y b/parse.y index 4760335765..55168adfd1 100644 --- a/parse.y +++ b/parse.y @@ -5587,6 +5587,7 @@ parser_yylex(struct parser_params *parser) int space_seen = 0; int cmd_state; unsigned char uc; + enum lex_state_e last_state; #ifdef RIPPER int fallthru = Qfalse; #endif @@ -6556,6 +6557,7 @@ parser_yylex(struct parser_params *parser) return '%'; case '$': + last_state = lex_state; lex_state = EXPR_END; newtok(); c = nextc(); @@ -6599,7 +6601,14 @@ parser_yylex(struct parser_params *parser) tokadd('$'); tokadd(c); c = nextc(); - tokadd(c); + uc = (unsigned char)c; + if (is_identchar(uc)) { + tokadd(c); + } + else { + pushback(c); + } + gvar: tokfix(); set_yylval_id(rb_intern(tok())); if (!is_global_id(yylval_id())) { @@ -6612,6 +6621,11 @@ parser_yylex(struct parser_params *parser) case '`': /* $`: string before last match */ case '\'': /* $': string after last match */ case '+': /* $+: string matches last paren. */ + if (last_state == EXPR_FNAME) { + tokadd('$'); + tokadd(c); + goto gvar; + } set_yylval_node(NEW_BACK_REF(c)); return tBACK_REF; @@ -6624,6 +6638,7 @@ parser_yylex(struct parser_params *parser) c = nextc(); } while (ISDIGIT(c)); pushback(c); + if (last_state == EXPR_FNAME) goto gvar; tokfix(); set_yylval_node(NEW_NTH_REF(atoi(tok()+1))); return tNTH_REF; @@ -6712,8 +6727,8 @@ parser_yylex(struct parser_params *parser) { int result = 0; - enum lex_state_e last_state = lex_state; + last_state = lex_state; switch (tok()[0]) { case '$': lex_state = EXPR_END; @@ -8285,6 +8300,97 @@ internal_id(void) return ID_INTERNAL | (++global_symbols.last_id << ID_SCOPE_SHIFT); } +static int +is_special_global_name(const char *m) +{ + switch (*m) { + case '~': case '*': case '$': case '?': case '!': case '@': + case '/': case '\\': case ';': case ',': case '.': case '=': + case ':': case '<': case '>': case '\"': + case '&': case '`': case '\'': case '+': + case '0': + ++m; + break; + case '-': + ++m; + if (is_identchar(*m)) m += mbclen(*m); + break; + default: + if (!ISDIGIT(*m)) return 0; + do ++m; while (ISDIGIT(*m)); + } + return !*m; +} + +int +rb_symname_p(const char *name) +{ + const char *m = name; + int localid = Qfalse; + + if (!m) return Qfalse; + switch (*m) { + case '\0': + return Qfalse; + + case '$': + if (is_special_global_name(++m)) return Qtrue; + goto id; + + case '@': + if (*++m == '@') ++m; + goto id; + + case '<': + switch (*++m) { + case '<': ++m; break; + case '=': if (*++m == '>') ++m; break; + default: break; + } + break; + + case '>': + if (*++m == '>') ++m; + break; + + case '=': + switch (*++m) { + case '~': ++m; break; + case '=': if (*++m == '=') ++m; break; + default: return Qfalse; + } + break; + + case '*': + if (*++m == '*') ++m; + break; + + case '+': case '-': + if (*++m == '@') ++m; + break; + + case '|': case '^': case '&': case '/': case '%': case '~': case '`': + break; + + case '[': + if (*++m == ']' && *++m == '=') ++m; + break; + + default: + localid = !ISUPPER(*m); + id: + if (*m != '_' && !ISALPHA(*m) && !ismbchar(*m)) return Qfalse; + while (is_identchar(*m)) m += mbclen(*m); + if (localid) { + switch (*m) { + case '!': case '?': case '=': ++m; + } + } + break; + } + return *m ? Qfalse : Qtrue; +} + ID rb_intern(const char *name) { @@ -8300,8 +8406,7 @@ rb_intern(const char *name) switch (*name) { case '$': id |= ID_GLOBAL; - m++; - if (!is_identchar(*m)) m++; + if (is_special_global_name(++m)) goto new_id; break; case '@': if (name[1] == '@') { @@ -8314,7 +8419,7 @@ rb_intern(const char *name) m++; break; default: - if (name[0] != '_' && !ISALPHA(name[0]) && !ismbchar(name[0])) { + if (name[0] != '_' && ISASCII(name[0]) && !ISALNUM(name[0])) { /* operators */ int i; @@ -8348,10 +8453,13 @@ rb_intern(const char *name) } break; } - while (m <= name + last && is_identchar(*m)) { - m += mbclen(*m); + if (!ISDIGIT(*m)) { + while (m <= name + last && is_identchar(*m)) { + m += mbclen(*m); + } } if (*m) id = ID_JUNK; + new_id: id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; id_regist: name = strdup(name); diff --git a/test_symbol.rb b/test_symbol.rb new file mode 100644 index 0000000000..b6dd203596 --- /dev/null +++ b/test_symbol.rb @@ -0,0 +1,50 @@ +require 'test/unit' + +class TestSymbol < Test::Unit::TestCase + # [ruby-core:3573] + + def assert_eval_inspected(sym) + n = sym.inspect + assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(n))} + end + + def test_inspect_invalid + # 2) Symbol#inspect sometimes returns invalid symbol representations: + assert_eval_inspected(:"!") + assert_eval_inspected(:"=") + assert_eval_inspected(:"0") + assert_eval_inspected(:"$1") + assert_eval_inspected(:"@1") + assert_eval_inspected(:"@@1") + assert_eval_inspected(:"@") + assert_eval_inspected(:"@@") + end + + def assert_inspect_evaled(n) + assert_nothing_raised(SyntaxError) {assert_equal(n, eval(n).inspect)} + end + + def test_inspect_suboptimal + # 3) Symbol#inspect sometimes returns suboptimal symbol representations: + assert_inspect_evaled(':foo') + assert_inspect_evaled(':foo!') + assert_inspect_evaled(':bar?') + end + + def test_inspect_dollar + # 4) :$- always treats next character literally: + sym = "$-".intern + assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(':$-'))} + assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(":$-\n"))} + assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(":$- "))} + assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(":$-#"))} + assert_raise(SyntaxError) {eval ':$-('} + end + + def test_inspect_number + # 5) Inconsistency between :$0 and :$1? The first one is valid, but the + # latter isn't. + assert_inspect_evaled(':$0') + assert_inspect_evaled(':$1') + end +end