From 34387d1a0273329ec0c9cf02c098b12552f4302b Mon Sep 17 00:00:00 2001 From: usa Date: Mon, 28 Jan 2008 18:04:39 +0000 Subject: [PATCH] * parse.y (reg_fragment_setenc_gen): US-ASCII script special code. * parse.y (reg_fragment_check_len, reg_compile_gen): no need such trick. [ruby-dev:33399] * test/ruby/test_m17n.rb (test_regexp_usacii_literal): add tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15304 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 10 ++++++++++ parse.y | 17 +++++++++-------- test/ruby/test_m17n.rb | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 46cded4c9c..903ba805ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Tue Jan 29 03:01:29 2008 NAKAMURA Usaku + + * parse.y (reg_fragment_setenc_gen): US-ASCII script special code. + + * parse.y (reg_fragment_check_len, reg_compile_gen): no need such + trick. + [ruby-dev:33399] + + * test/ruby/test_m17n.rb (test_regexp_usacii_literal): add tests. + Tue Jan 29 01:38:02 2008 NAKAMURA Usaku * common.mk ($(srcdir)/revision.h): no need to show ifchange execution diff --git a/parse.y b/parse.y index a6fed50670..3718d99a2b 100644 --- a/parse.y +++ b/parse.y @@ -8501,6 +8501,15 @@ reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) } rb_enc_associate(str, rb_ascii8bit_encoding()); } + else if (parser->enc == rb_usascii_encoding()) { + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + /* raise in re.c */ + rb_enc_associate(str, rb_usascii_encoding()); + } + else { + rb_enc_associate(str, rb_ascii8bit_encoding()); + } + } return; error: @@ -8513,10 +8522,6 @@ static void reg_fragment_check_gen(struct parser_params* parser, VALUE str, int options) { VALUE err; - if (!RE_OPTION_ENCODING_IDX(options) && - parser->enc == rb_usascii_encoding()) { - options |= RE_OPTION_ARG_ENCODING_NONE; - } reg_fragment_setenc_gen(parser, str, options); err = rb_reg_check_preprocess(str); if (err != Qnil) { @@ -8610,10 +8615,6 @@ reg_compile_gen(struct parser_params* parser, VALUE str, int options) { VALUE re; - if (!RE_OPTION_ENCODING_IDX(options) && - parser->enc == rb_usascii_encoding()) { - options |= RE_OPTION_ARG_ENCODING_NONE; - } reg_fragment_setenc(str, options); re = rb_reg_compile(str, options & RE_OPTION_MASK); if (NIL_P(re)) { diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index c9d7d8f46e..056f4f3df5 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -78,6 +78,15 @@ class TestM17N < Test::Unit::TestCase assert_regexp_fixed_encoding(r) end + def assert_regexp_usascii_literal(r, enc, ex = nil) + code = "# -*- encoding: US-ASCII -*-\n#{r}.encoding" + if ex + assert_raise(ex) { eval(code) } + else + assert_equal(enc, eval(code)) + end + end + def encdump(str) d = str.dump if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d @@ -1045,4 +1054,32 @@ class TestM17N < Test::Unit::TestCase assert_equal(Encoding::US_ASCII, eval("# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT"))) assert_equal(Encoding::ASCII_8BIT, eval("# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII"))) end + + def test_regexp_usascii + assert_regexp_usascii_literal('//', Encoding::US_ASCII) + assert_regexp_usascii_literal('/#{}/', Encoding::US_ASCII) + assert_regexp_usascii_literal('/#{"a"}/', Encoding::US_ASCII) + assert_regexp_usascii_literal('/#{%q"\x80"}/', Encoding::ASCII_8BIT) + assert_regexp_usascii_literal('/#{"\x80"}/', nil, SyntaxError) + + assert_regexp_usascii_literal('/a/', Encoding::US_ASCII) + assert_regexp_usascii_literal('/a#{}/', Encoding::US_ASCII) + assert_regexp_usascii_literal('/a#{"a"}/', Encoding::US_ASCII) + assert_regexp_usascii_literal('/a#{%q"\x80"}/', Encoding::ASCII_8BIT) + assert_regexp_usascii_literal('/a#{"\x80"}/', nil, SyntaxError) + + assert_regexp_usascii_literal('/\x80/', Encoding::ASCII_8BIT) + assert_regexp_usascii_literal('/\x80#{}/', Encoding::ASCII_8BIT) + assert_regexp_usascii_literal('/\x80#{"a"}/', Encoding::ASCII_8BIT) + assert_regexp_usascii_literal('/\x80#{%q"\x80"}/', Encoding::ASCII_8BIT) + assert_regexp_usascii_literal('/\x80#{"\x80"}/', nil, SyntaxError) + + assert_regexp_usascii_literal('/\u1234/', Encoding::UTF_8) + assert_regexp_usascii_literal('/\u1234#{}/', Encoding::UTF_8) + assert_regexp_usascii_literal('/\u1234#{"a"}/', Encoding::UTF_8) + assert_regexp_usascii_literal('/\u1234#{%q"\x80"}/', nil, SyntaxError) + assert_regexp_usascii_literal('/\u1234#{"\x80"}/', nil, SyntaxError) + assert_regexp_usascii_literal('/\u1234\x80/', nil, SyntaxError) + assert_regexp_usascii_literal('/\u1234#{}\x80/', nil, RegexpError) + end end