diff --git a/ChangeLog b/ChangeLog index d1ae0255df..cecb8e953d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +Mon Jan 7 11:44:45 2008 Tanaka Akira + + * encoding.c (rb_enc_internal_get_index): extracted from + rb_enc_get_index. + (rb_enc_internal_set_index): extracted from rb_enc_associate_index + + * include/ruby/encoding.h (ENCODING_SET): work over ENCODING_INLINE_MAX. + (ENCODING_GET): ditto. + (ENCODING_IS_ASCII8BIT): defined. + (ENCODING_CODERANGE_SET): defined. + + * re.c (rb_reg_fixed_encoding_p): use ENCODING_IS_ASCII8BIT. + + * string.c (rb_enc_str_buf_cat): use ENCODING_IS_ASCII8BIT. + + * parse.y (reg_fragment_setenc_gen): use ENCODING_IS_ASCII8BIT. + + * marshal.c (has_ivars): use ENCODING_IS_ASCII8BIT. + Mon Jan 7 02:14:07 2008 Tanaka Akira * string.c (coderange_scan): avoid rb_enc_to_index. diff --git a/encoding.c b/encoding.c index 2d1813fe91..65609763af 100644 --- a/encoding.c +++ b/encoding.c @@ -436,6 +436,33 @@ rb_id_encoding(void) return id_encoding; } +int +rb_enc_internal_get_index(VALUE obj) +{ + int i; + + i = ENCODING_GET_INLINED(obj); + if (i == ENCODING_INLINE_MAX) { + VALUE iv; + + iv = rb_ivar_get(obj, rb_id_encoding()); + i = NUM2INT(iv); + } + return i; +} + +void +rb_enc_internal_set_index(VALUE obj, int idx) +{ + if (idx < ENCODING_INLINE_MAX) { + ENCODING_SET_INLINED(obj, idx); + return; + } + ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); + rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); + return; +} + void rb_enc_associate_index(VALUE obj, int idx) { @@ -444,13 +471,7 @@ rb_enc_associate_index(VALUE obj, int idx) !rb_enc_asciicompat(rb_enc_from_index(idx))) { ENC_CODERANGE_CLEAR(obj); } - if (idx < ENCODING_INLINE_MAX) { - ENCODING_SET(obj, idx); - return; - } - ENCODING_SET(obj, ENCODING_INLINE_MAX); - rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); - return; + rb_enc_internal_set_index(obj, idx); } int @@ -476,17 +497,8 @@ rb_enc_associate(VALUE obj, rb_encoding *enc) int rb_enc_get_index(VALUE obj) { - int i; - if (!enc_capable(obj)) return -1; - i = ENCODING_GET(obj); - if (i == ENCODING_INLINE_MAX) { - VALUE iv; - - iv = rb_ivar_get(obj, rb_id_encoding()); - i = NUM2INT(iv); - } - return i; + return rb_enc_internal_get_index(obj); } rb_encoding* diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 0ccded8b0f..3810d86b39 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -22,11 +22,27 @@ #define ENCODING_INLINE_MAX 1023 #define ENCODING_SHIFT (FL_USHIFT+10) #define ENCODING_MASK (ENCODING_INLINE_MAX<flags &= ~ENCODING_MASK;\ - RBASIC(obj)->flags |= i << ENCODING_SHIFT;\ + RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\ } while (0) -#define ENCODING_GET(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT) +#define ENCODING_SET(obj,i) do {\ + VALUE rb_encoding_set_obj = (obj); \ + int encoding_set_enc_index = (i); \ + if (encoding_set_enc_index < ENCODING_INLINE_MAX) \ + ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \ + else \ + rb_enc_internal_set_index(rb_encoding_set_obj, encoding_set_enc_index); \ +} while (0) + +#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT) +#define ENCODING_GET(obj) \ + (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \ + ENCODING_GET_INLINED(obj) : \ + rb_enc_internal_get_index(obj)) + +#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0) #define ENC_CODERANGE_MASK (FL_USER8|FL_USER9) #define ENC_CODERANGE_UNKNOWN 0 @@ -39,6 +55,12 @@ (RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr)) #define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0) +#define ENCODING_CODERANGE_SET(obj, encindex, cr) \ + do { \ + VALUE rb_encoding_coderange_obj = (obj); \ + ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \ + ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \ + } while (0) typedef OnigEncodingType rb_encoding; @@ -56,6 +78,8 @@ rb_encoding* rb_enc_check(VALUE,VALUE); void rb_enc_associate_index(VALUE, int); void rb_enc_associate(VALUE, rb_encoding*); void rb_enc_copy(VALUE dst, VALUE src); +int rb_enc_internal_get_index(VALUE obj); +void rb_enc_internal_set_index(VALUE obj, int encindex); VALUE rb_enc_str_new(const char*, long, rb_encoding*); VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int); diff --git a/marshal.c b/marshal.c index 55ef25b31b..29f26d0b2f 100644 --- a/marshal.c +++ b/marshal.c @@ -526,7 +526,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) st_data_t num; int hasiv = 0; #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \ - (!SPECIAL_CONST_P(obj) && ENCODING_GET(obj))) + (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj))) if (limit == 0) { rb_raise(rb_eArgError, "exceed depth limit"); diff --git a/parse.y b/parse.y index bd9f1b1286..75459c354d 100644 --- a/parse.y +++ b/parse.y @@ -8470,7 +8470,7 @@ reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) if (c) { int opt, idx; rb_char_to_option_kcode(c, &opt, &idx); - if (idx != ENCODING_GET(str) && ENCODING_GET(str) && + if (idx != ENCODING_GET(str) && !ENCODING_IS_ASCII8BIT(str) && rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { compile_error(PARSER_ARG "regexp encoding option '%c' differs from source encoding '%s'", diff --git a/re.c b/re.c index fb05e2e892..fc9e830131 100644 --- a/re.c +++ b/re.c @@ -937,7 +937,7 @@ rb_match_busy(VALUE match) static VALUE rb_reg_fixed_encoding_p(VALUE re) { - if (ENCODING_GET(re) != 0 || FL_TEST(re, KCODE_FIXED)) + if (!ENCODING_IS_ASCII8BIT(re) || FL_TEST(re, KCODE_FIXED)) return Qtrue; else return Qfalse; diff --git a/string.c b/string.c index 752a2e5353..524f9bd9ea 100644 --- a/string.c +++ b/string.c @@ -1063,7 +1063,7 @@ rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc) rb_encoding *str_enc = rb_enc_get(str); rb_encoding *res_enc; int str_cr, ptr_cr, res_cr; - int str_a8 = ENCODING_GET(str) == 0; + int str_a8 = ENCODING_IS_ASCII8BIT(str); int ptr_a8 = ptr_enc == rb_ascii8bit_encoding(); str_cr = ENC_CODERANGE(str); diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index b15bcfdf10..32415683b6 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -370,6 +370,15 @@ class TestM17N < Test::Unit::TestCase assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/}))) end + def test_regexp_windows_31j + begin + Regexp.new("\xa1".force_encoding("windows-31j")) =~ "\xa1\xa1".force_encoding("euc-jp") + rescue ArgumentError + err = $! + end + assert_match(/windows-31j/i, err.message) + end + def test_regexp_embed r = eval(e("/\xc2\xa1/")) assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) }