mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* include/ruby/encoding.h, encoding.c, re.c, string.c, parse.y:
rename ENC_CODERANGE_SINGLE to ENC_CODERANGE_7BIT. rename ENC_CODERANGE_MULTI to ENC_CODERANGE_8BIT. Because single byte 8bit character, such as Shift_JIS 1byte katakana, is represented by ENC_CODERANGE_MULTI even if it is not multi byte. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14027 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
9c7aab17a5
commit
f5ee0fd521
6 changed files with 30 additions and 22 deletions
|
@ -1,3 +1,11 @@
|
||||||
|
Tue Nov 27 11:14:57 2007 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* include/ruby/encoding.h, encoding.c, re.c, string.c, parse.y:
|
||||||
|
rename ENC_CODERANGE_SINGLE to ENC_CODERANGE_7BIT.
|
||||||
|
rename ENC_CODERANGE_MULTI to ENC_CODERANGE_8BIT.
|
||||||
|
Because single byte 8bit character, such as Shift_JIS 1byte katakana,
|
||||||
|
is represented by ENC_CODERANGE_MULTI even if it is not multi byte.
|
||||||
|
|
||||||
Tue Nov 27 10:45:45 2007 Koichi Sasada <ko1@atdot.net>
|
Tue Nov 27 10:45:45 2007 Koichi Sasada <ko1@atdot.net>
|
||||||
|
|
||||||
* eval.c (rb_method_missing): fix stack trace.
|
* eval.c (rb_method_missing): fix stack trace.
|
||||||
|
|
|
@ -407,12 +407,12 @@ rb_enc_compatible(VALUE str1, VALUE str2)
|
||||||
cr2 = rb_enc_str_coderange(str2);
|
cr2 = rb_enc_str_coderange(str2);
|
||||||
if (cr1 != cr2) {
|
if (cr1 != cr2) {
|
||||||
/* may need to handle ENC_CODERANGE_BROKEN */
|
/* may need to handle ENC_CODERANGE_BROKEN */
|
||||||
if (cr1 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx2);
|
if (cr1 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx2);
|
||||||
if (cr2 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx1);
|
if (cr2 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx1);
|
||||||
}
|
}
|
||||||
if (cr1 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(0);
|
if (cr1 == ENC_CODERANGE_7BIT) return rb_enc_from_index(0);
|
||||||
}
|
}
|
||||||
if (cr1 == ENC_CODERANGE_SINGLE &&
|
if (cr1 == ENC_CODERANGE_7BIT &&
|
||||||
rb_enc_asciicompat(enc = rb_enc_from_index(idx2)))
|
rb_enc_asciicompat(enc = rb_enc_from_index(idx2)))
|
||||||
return enc;
|
return enc;
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,11 +26,11 @@
|
||||||
|
|
||||||
#define ENC_CODERANGE_MASK (FL_USER12|FL_USER13)
|
#define ENC_CODERANGE_MASK (FL_USER12|FL_USER13)
|
||||||
#define ENC_CODERANGE_UNKNOWN 0
|
#define ENC_CODERANGE_UNKNOWN 0
|
||||||
#define ENC_CODERANGE_SINGLE FL_USER12
|
#define ENC_CODERANGE_7BIT FL_USER12
|
||||||
#define ENC_CODERANGE_MULTI FL_USER13
|
#define ENC_CODERANGE_8BIT FL_USER13
|
||||||
#define ENC_CODERANGE_BROKEN (FL_USER12|FL_USER13)
|
#define ENC_CODERANGE_BROKEN (FL_USER12|FL_USER13)
|
||||||
#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK)
|
#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK)
|
||||||
#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_SINGLE)
|
#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT)
|
||||||
#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \
|
#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \
|
||||||
(RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
|
(RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
|
||||||
#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
|
#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
|
||||||
|
|
18
parse.y
18
parse.y
|
@ -271,7 +271,7 @@ struct parser_params {
|
||||||
#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
|
#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
|
||||||
#define STR_NEW3(p,n,e,has8,hasmb) parser_str_new2((p),(n),(e),(has8),(hasmb))
|
#define STR_NEW3(p,n,e,has8,hasmb) parser_str_new2((p),(n),(e),(has8),(hasmb))
|
||||||
#define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))
|
#define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))
|
||||||
#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_SINGLE)
|
#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
|
||||||
#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb))
|
#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb))
|
||||||
|
|
||||||
#ifdef YYMALLOC
|
#ifdef YYMALLOC
|
||||||
|
@ -4835,8 +4835,8 @@ parser_str_new2(const char *p, long n, rb_encoding *enc, int has8bit,int hasmb)
|
||||||
* Set coderange bit flags based on the presence of 8-bit and
|
* Set coderange bit flags based on the presence of 8-bit and
|
||||||
* multi-byte characters in the string
|
* multi-byte characters in the string
|
||||||
*/
|
*/
|
||||||
int coderange = ENC_CODERANGE_SINGLE;
|
int coderange = ENC_CODERANGE_7BIT;
|
||||||
if (hasmb) coderange = ENC_CODERANGE_MULTI;
|
if (hasmb) coderange = ENC_CODERANGE_8BIT;
|
||||||
else if (has8bit) coderange = ENC_CODERANGE_UNKNOWN;
|
else if (has8bit) coderange = ENC_CODERANGE_UNKNOWN;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4845,7 +4845,7 @@ parser_str_new2(const char *p, long n, rb_encoding *enc, int has8bit,int hasmb)
|
||||||
* string is in the ASCII subset, and we just use the ASCII encoding
|
* string is in the ASCII subset, and we just use the ASCII encoding
|
||||||
* instead.
|
* instead.
|
||||||
*/
|
*/
|
||||||
if ((coderange == ENC_CODERANGE_SINGLE) && rb_enc_asciicompat(enc))
|
if ((coderange == ENC_CODERANGE_7BIT) && rb_enc_asciicompat(enc))
|
||||||
enc = rb_enc_default();
|
enc = rb_enc_default();
|
||||||
|
|
||||||
return parser_str_new(p, n, enc, coderange);
|
return parser_str_new(p, n, enc, coderange);
|
||||||
|
@ -5676,7 +5676,7 @@ parser_here_document(struct parser_params *parser, NODE *here)
|
||||||
} while (!whole_match_p(eos, len, indent));
|
} while (!whole_match_p(eos, len, indent));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* int mb = ENC_CODERANGE_SINGLE, *mbp = &mb;*/
|
/* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/
|
||||||
int has8bit=0, hasmb=0;
|
int has8bit=0, hasmb=0;
|
||||||
rb_encoding *enc = parser->enc;
|
rb_encoding *enc = parser->enc;
|
||||||
newtok();
|
newtok();
|
||||||
|
@ -7100,7 +7100,7 @@ parser_yylex(struct parser_params *parser)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
mb = ENC_CODERANGE_SINGLE;
|
mb = ENC_CODERANGE_7BIT;
|
||||||
do {
|
do {
|
||||||
if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
|
if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
|
||||||
tokadd_mbchar(c);
|
tokadd_mbchar(c);
|
||||||
|
@ -7155,7 +7155,7 @@ parser_yylex(struct parser_params *parser)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mb == ENC_CODERANGE_SINGLE && lex_state != EXPR_DOT) {
|
if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
|
||||||
const struct kwtable *kw;
|
const struct kwtable *kw;
|
||||||
|
|
||||||
/* See if it is a reserved word. */
|
/* See if it is a reserved word. */
|
||||||
|
@ -8466,7 +8466,7 @@ reg_compile_gen(struct parser_params* parser, VALUE str, int options)
|
||||||
int opt, idx;
|
int opt, idx;
|
||||||
rb_char_to_option_kcode(c, &opt, &idx);
|
rb_char_to_option_kcode(c, &opt, &idx);
|
||||||
if (idx != ENCODING_GET(str) && ENCODING_GET(str) &&
|
if (idx != ENCODING_GET(str) && ENCODING_GET(str) &&
|
||||||
rb_enc_str_coderange(str) != ENC_CODERANGE_SINGLE) {
|
rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
|
||||||
compile_error(PARSER_ARG
|
compile_error(PARSER_ARG
|
||||||
"regexp encoding option '%c' differs from source encoding '%s'",
|
"regexp encoding option '%c' differs from source encoding '%s'",
|
||||||
c, rb_enc_name(rb_enc_get(str)));
|
c, rb_enc_name(rb_enc_get(str)));
|
||||||
|
@ -8919,7 +8919,7 @@ rb_intern_str(VALUE str)
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
ID id;
|
ID id;
|
||||||
|
|
||||||
if (rb_enc_str_coderange(str) != ENC_CODERANGE_SINGLE) {
|
if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
|
||||||
idx = rb_enc_get_index(str);
|
idx = rb_enc_get_index(str);
|
||||||
}
|
}
|
||||||
id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str),
|
id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str),
|
||||||
|
|
2
re.c
2
re.c
|
@ -725,7 +725,7 @@ rb_reg_prepare_re(VALUE re, VALUE str)
|
||||||
/* ignorecase status */
|
/* ignorecase status */
|
||||||
if (rb_reg_fixed_encoding_p(re)) {
|
if (rb_reg_fixed_encoding_p(re)) {
|
||||||
if (ENCODING_GET(re) != rb_enc_get_index(str) &&
|
if (ENCODING_GET(re) != rb_enc_get_index(str) &&
|
||||||
rb_enc_str_coderange(str) != ENC_CODERANGE_SINGLE) {
|
rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
|
||||||
rb_raise(rb_eArgError, "character encodings differ");
|
rb_raise(rb_eArgError, "character encodings differ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
10
string.c
10
string.c
|
@ -92,7 +92,7 @@ VALUE rb_cSymbol;
|
||||||
}\
|
}\
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_SINGLE)
|
#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
|
||||||
|
|
||||||
VALUE rb_fs;
|
VALUE rb_fs;
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ rb_enc_str_coderange(VALUE str)
|
||||||
rb_encoding *enc = rb_enc_get(str);
|
rb_encoding *enc = rb_enc_get(str);
|
||||||
|
|
||||||
if (!rb_enc_asciicompat(enc)) {
|
if (!rb_enc_asciicompat(enc)) {
|
||||||
cr = ENC_CODERANGE_MULTI;
|
cr = ENC_CODERANGE_8BIT;
|
||||||
ENC_CODERANGE_SET(str, cr);
|
ENC_CODERANGE_SET(str, cr);
|
||||||
return cr;
|
return cr;
|
||||||
}
|
}
|
||||||
|
@ -113,12 +113,12 @@ rb_enc_str_coderange(VALUE str)
|
||||||
const char *p = RSTRING_PTR(str);
|
const char *p = RSTRING_PTR(str);
|
||||||
const char *e = p + RSTRING_LEN(str);
|
const char *e = p + RSTRING_LEN(str);
|
||||||
|
|
||||||
cr = ENC_CODERANGE_SINGLE;
|
cr = ENC_CODERANGE_7BIT;
|
||||||
while (p < e) {
|
while (p < e) {
|
||||||
int c = (unsigned char)*p;
|
int c = (unsigned char)*p;
|
||||||
|
|
||||||
if (!isascii(c)) {
|
if (!isascii(c)) {
|
||||||
cr = ENC_CODERANGE_MULTI;
|
cr = ENC_CODERANGE_8BIT;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
p++;
|
p++;
|
||||||
|
@ -134,7 +134,7 @@ int rb_enc_str_asciionly_p(VALUE str)
|
||||||
rb_encoding *enc = rb_enc_get(str);
|
rb_encoding *enc = rb_enc_get(str);
|
||||||
|
|
||||||
if (rb_enc_asciicompat(enc) &&
|
if (rb_enc_asciicompat(enc) &&
|
||||||
rb_enc_str_coderange(str) == ENC_CODERANGE_SINGLE) {
|
rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
|
||||||
char *ptr = RSTRING_PTR(str);
|
char *ptr = RSTRING_PTR(str);
|
||||||
long len = RSTRING_LEN(str);
|
long len = RSTRING_LEN(str);
|
||||||
long i;
|
long i;
|
||||||
|
|
Loading…
Add table
Reference in a new issue