mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* regparse.c (PINC): use optimized enclen() instead of
ONIGENC_MBC_ENC_LEN(). * regparse.c (PFETCH): ditto. * regparse.c (PFETCH): small optimization. * regexec.c (slow_search): single byte encoding optimization. * regenc.h (enclen): avoid calling function when encoding's min_len == max_len. * re.c (rb_reg_regsub): rb_enc_ascget() optimization for single byte encoding. * re.c (rb_reg_search): avoid allocating new re_registers if we already have MatchData. * re.c (match_init_copy): avoid unnecessary onig_region_free() before onig_region_copy. * encoding.c (rb_enc_get_index): remove implicit enc_capable check each time. * encoding.c (rb_enc_set_index): ditto. * encoding.c (enc_compatible_p): small refactoring. * include/ruby/encoding.h (rb_enc_dummy_p): inline rb_enc_dummy_p() and export related code. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16477 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
48a42a0387
commit
44cd8e457b
9 changed files with 115 additions and 69 deletions
33
ChangeLog
33
ChangeLog
|
@ -1,3 +1,36 @@
|
|||
Mon May 19 17:23:55 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* regparse.c (PINC): use optimized enclen() instead of
|
||||
ONIGENC_MBC_ENC_LEN().
|
||||
|
||||
* regparse.c (PFETCH): ditto.
|
||||
|
||||
* regparse.c (PFETCH): small optimization.
|
||||
|
||||
* regexec.c (slow_search): single byte encoding optimization.
|
||||
|
||||
* regenc.h (enclen): avoid calling function when encoding's
|
||||
min_len == max_len.
|
||||
|
||||
* re.c (rb_reg_regsub): rb_enc_ascget() optimization for single
|
||||
byte encoding.
|
||||
|
||||
* re.c (rb_reg_search): avoid allocating new re_registers if we
|
||||
already have MatchData.
|
||||
|
||||
* re.c (match_init_copy): avoid unnecessary onig_region_free()
|
||||
before onig_region_copy.
|
||||
|
||||
* encoding.c (rb_enc_get_index): remove implicit enc_capable check
|
||||
each time.
|
||||
|
||||
* encoding.c (rb_enc_set_index): ditto.
|
||||
|
||||
* encoding.c (enc_compatible_p): small refactoring.
|
||||
|
||||
* include/ruby/encoding.h (rb_enc_dummy_p): inline
|
||||
rb_enc_dummy_p() and export related code.
|
||||
|
||||
Mon May 19 14:32:03 2008 Koichi Sasada <ko1@atdot.net>
|
||||
|
||||
* version.h: fix strange change by version.h update tool.
|
||||
|
|
|
@ -615,7 +615,7 @@ cont.$(OBJEXT): {$(VPATH)}cont.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
|
|||
{$(VPATH)}eval_intern.h {$(VPATH)}util.h {$(VPATH)}dln.h
|
||||
time.$(OBJEXT): {$(VPATH)}time.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
|
||||
{$(VPATH)}defines.h {$(VPATH)}missing.h {$(VPATH)}intern.h \
|
||||
{$(VPATH)}st.h
|
||||
{$(VPATH)}st.h {$(VPATH)}encoding.h
|
||||
util.$(OBJEXT): {$(VPATH)}util.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
|
||||
{$(VPATH)}defines.h {$(VPATH)}missing.h {$(VPATH)}intern.h \
|
||||
{$(VPATH)}st.h {$(VPATH)}util.h
|
||||
|
|
50
encoding.c
50
encoding.c
|
@ -18,7 +18,7 @@
|
|||
#endif
|
||||
|
||||
static ID id_encoding, id_base_encoding;
|
||||
static VALUE rb_cEncoding;
|
||||
VALUE rb_cEncoding;
|
||||
|
||||
struct rb_encoding_entry {
|
||||
const char *name;
|
||||
|
@ -38,14 +38,6 @@ void rb_enc_init(void);
|
|||
|
||||
#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
|
||||
|
||||
#define ENC_UNINITIALIZED (&rb_cEncoding)
|
||||
#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
|
||||
#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
|
||||
|
||||
#define ENC_DUMMY_FLAG FL_USER2
|
||||
#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
|
||||
#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
|
||||
|
||||
static int load_encoding(const char *name);
|
||||
static VALUE enc_base_encoding(VALUE self);
|
||||
|
||||
|
@ -318,15 +310,6 @@ rb_encdb_dummy(const char *name)
|
|||
return index;
|
||||
}
|
||||
|
||||
int
|
||||
rb_enc_dummy_p(rb_encoding *enc)
|
||||
{
|
||||
VALUE encoding;
|
||||
if (!enc_initialized_p(enc)) return Qfalse;
|
||||
encoding = rb_enc_from_encoding(enc);
|
||||
return ENC_DUMMY_P(encoding);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* enc.dummy? => true or false
|
||||
|
@ -343,7 +326,7 @@ rb_enc_dummy_p(rb_encoding *enc)
|
|||
static VALUE
|
||||
enc_dummy_p(VALUE enc)
|
||||
{
|
||||
return rb_enc_dummy_p(rb_to_encoding(enc)) ? Qtrue : Qfalse;
|
||||
return ENC_DUMMY_P(enc) ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -555,7 +538,7 @@ rb_id_encoding(void)
|
|||
}
|
||||
|
||||
int
|
||||
rb_enc_internal_get_index(VALUE obj)
|
||||
rb_enc_get_index(VALUE obj)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -570,7 +553,7 @@ rb_enc_internal_get_index(VALUE obj)
|
|||
}
|
||||
|
||||
void
|
||||
rb_enc_internal_set_index(VALUE obj, int idx)
|
||||
rb_enc_set_index(VALUE obj, int idx)
|
||||
{
|
||||
if (idx < ENCODING_INLINE_MAX) {
|
||||
ENCODING_SET_INLINED(obj, idx);
|
||||
|
@ -584,14 +567,14 @@ rb_enc_internal_set_index(VALUE obj, int idx)
|
|||
void
|
||||
rb_enc_associate_index(VALUE obj, int idx)
|
||||
{
|
||||
enc_check_capable(obj);
|
||||
if (rb_enc_internal_get_index(obj) == idx)
|
||||
// enc_check_capable(obj);
|
||||
if (rb_enc_get_index(obj) == idx)
|
||||
return;
|
||||
if (!ENC_CODERANGE_ASCIIONLY(obj) ||
|
||||
!rb_enc_asciicompat(rb_enc_from_index(idx))) {
|
||||
ENC_CODERANGE_CLEAR(obj);
|
||||
}
|
||||
rb_enc_internal_set_index(obj, idx);
|
||||
rb_enc_set_index(obj, idx);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -600,13 +583,6 @@ rb_enc_associate(VALUE obj, rb_encoding *enc)
|
|||
rb_enc_associate_index(obj, rb_enc_to_index(enc));
|
||||
}
|
||||
|
||||
int
|
||||
rb_enc_get_index(VALUE obj)
|
||||
{
|
||||
if (!enc_capable(obj)) return -1;
|
||||
return rb_enc_internal_get_index(obj);
|
||||
}
|
||||
|
||||
rb_encoding*
|
||||
rb_enc_get(VALUE obj)
|
||||
{
|
||||
|
@ -906,11 +882,13 @@ enc_find(VALUE klass, VALUE enc)
|
|||
static VALUE
|
||||
enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
|
||||
{
|
||||
rb_encoding *enc = rb_enc_compatible(str1, str2);
|
||||
VALUE encoding = Qnil;
|
||||
if (!enc || !(encoding = rb_enc_from_encoding(enc)))
|
||||
encoding = Qnil;
|
||||
return encoding;
|
||||
rb_encoding *enc;
|
||||
|
||||
if (!enc_capable(str1)) return Qnil;
|
||||
if (!enc_capable(str2)) return Qnil;
|
||||
enc = rb_enc_compatible(str1, str2);
|
||||
if (!enc) return Qnil;
|
||||
return rb_enc_from_encoding(enc);
|
||||
}
|
||||
|
||||
/* :nodoc: */
|
||||
|
|
|
@ -33,14 +33,14 @@
|
|||
if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
|
||||
ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
|
||||
else \
|
||||
rb_enc_internal_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
|
||||
rb_enc_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
|
||||
} while (0)
|
||||
|
||||
#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
|
||||
#define ENCODING_GET(obj) \
|
||||
(ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
|
||||
ENCODING_GET_INLINED(obj) : \
|
||||
rb_enc_internal_get_index(obj))
|
||||
rb_enc_get_index(obj))
|
||||
|
||||
#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
|
||||
|
||||
|
@ -74,9 +74,9 @@ typedef OnigEncodingType rb_encoding;
|
|||
|
||||
int rb_enc_replicate(const char *, rb_encoding *);
|
||||
int rb_define_dummy_encoding(const char *);
|
||||
int rb_enc_dummy_p(rb_encoding *);
|
||||
#define rb_enc_to_index(enc) ((enc) ? ((enc)->ruby_encoding_index) : 0)
|
||||
int rb_enc_get_index(VALUE obj);
|
||||
void rb_enc_set_index(VALUE obj, int encindex);
|
||||
int rb_enc_find_index(const char *name);
|
||||
int rb_to_encoding_index(VALUE);
|
||||
rb_encoding* rb_to_encoding(VALUE);
|
||||
|
@ -86,8 +86,6 @@ rb_encoding* rb_enc_check(VALUE,VALUE);
|
|||
void rb_enc_associate_index(VALUE, int);
|
||||
void rb_enc_associate(VALUE, rb_encoding*);
|
||||
void rb_enc_copy(VALUE dst, VALUE src);
|
||||
int rb_enc_internal_get_index(VALUE obj);
|
||||
void rb_enc_internal_set_index(VALUE obj, int encindex);
|
||||
|
||||
VALUE rb_enc_str_new(const char*, long, rb_encoding*);
|
||||
VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int);
|
||||
|
@ -154,7 +152,7 @@ int rb_enc_codelen(int code, rb_encoding *enc);
|
|||
#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
|
||||
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
|
||||
|
||||
#define rb_enc_asciicompat(enc) (!rb_enc_dummy_p(enc) && rb_enc_mbminlen(enc)==1)
|
||||
#define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc))
|
||||
|
||||
int rb_enc_casefold(char *to, const char *p, const char *e, rb_encoding *enc);
|
||||
int rb_enc_toupper(int c, rb_encoding *enc);
|
||||
|
@ -178,4 +176,21 @@ void rb_enc_set_default_external(VALUE encoding);
|
|||
VALUE rb_locale_charmap(VALUE klass);
|
||||
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
|
||||
|
||||
RUBY_EXTERN VALUE rb_cEncoding;
|
||||
|
||||
#define ENC_UNINITIALIZED (&rb_cEncoding)
|
||||
#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
|
||||
#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
|
||||
|
||||
#define ENC_DUMMY_FLAG FL_USER2
|
||||
#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
|
||||
#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
|
||||
|
||||
static inline int
|
||||
rb_enc_dummy_p(rb_encoding *enc)
|
||||
{
|
||||
if (!enc_initialized_p(enc)) return Qfalse;
|
||||
return ENC_DUMMY_P(ENC_FROM_ENCODING(enc));
|
||||
}
|
||||
|
||||
#endif /* RUBY_ENCODING_H */
|
||||
|
|
41
re.c
41
re.c
|
@ -881,9 +881,6 @@ match_init_copy(VALUE obj, VALUE orig)
|
|||
RMATCH(obj)->regexp = RMATCH(orig)->regexp;
|
||||
|
||||
rm = RMATCH(obj)->rmatch;
|
||||
onig_region_free(&rm->regs, 0);
|
||||
rm->regs.allocated = 0;
|
||||
|
||||
onig_region_copy(&rm->regs, RMATCH_REGS(orig));
|
||||
|
||||
if (!RMATCH(orig)->rmatch->char_offset_updated) {
|
||||
|
@ -1265,7 +1262,7 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
|
|||
{
|
||||
int result;
|
||||
VALUE match;
|
||||
struct re_registers regs;
|
||||
struct re_registers *regs, regi;
|
||||
char *range = RSTRING_PTR(str);
|
||||
regex_t *reg0 = RREGEXP(re)->ptr, *reg;
|
||||
int busy = FL_TEST(re, REG_BUSY);
|
||||
|
@ -1277,17 +1274,29 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
|
|||
|
||||
reg = rb_reg_prepare_re(re, str);
|
||||
|
||||
match = rb_backref_get();
|
||||
if (!NIL_P(match)) {
|
||||
if (FL_TEST(match, MATCH_BUSY)) {
|
||||
match = Qnil;
|
||||
}
|
||||
else {
|
||||
regs = RMATCH_REGS(match);
|
||||
}
|
||||
}
|
||||
if (NIL_P(match)) {
|
||||
regs = ®i;
|
||||
MEMZERO(regs, struct re_registers, 1);
|
||||
}
|
||||
FL_SET(re, REG_BUSY);
|
||||
if (!reverse) {
|
||||
range += RSTRING_LEN(str);
|
||||
}
|
||||
MEMZERO(®s, struct re_registers, 1);
|
||||
result = onig_search(reg,
|
||||
(UChar*)(RSTRING_PTR(str)),
|
||||
((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)),
|
||||
((UChar*)(RSTRING_PTR(str)) + pos),
|
||||
((UChar*)range),
|
||||
®s, ONIG_OPTION_NONE);
|
||||
regs, ONIG_OPTION_NONE);
|
||||
|
||||
if (RREGEXP(re)->ptr != reg) {
|
||||
if (busy) {
|
||||
|
@ -1300,7 +1309,8 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
|
|||
}
|
||||
if (!busy) FL_UNSET(re, REG_BUSY);
|
||||
if (result < 0) {
|
||||
onig_region_free(®s, 0);
|
||||
if (regs == ®i)
|
||||
onig_region_free(regs, 0);
|
||||
if (result == ONIG_MISMATCH) {
|
||||
rb_backref_set(Qnil);
|
||||
return result;
|
||||
|
@ -1312,9 +1322,10 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
|
|||
}
|
||||
}
|
||||
|
||||
match = rb_backref_get();
|
||||
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
|
||||
if (NIL_P(match)) {
|
||||
match = match_alloc(rb_cMatch);
|
||||
onig_region_copy(RMATCH_REGS(match), regs);
|
||||
onig_region_free(regs, 0);
|
||||
}
|
||||
else {
|
||||
if (rb_safe_level() >= 3)
|
||||
|
@ -1323,8 +1334,6 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
|
|||
FL_UNSET(match, FL_TAINT);
|
||||
}
|
||||
|
||||
onig_region_copy(RMATCH_REGS(match), ®s);
|
||||
onig_region_free(®s, 0);
|
||||
RMATCH(match)->str = rb_str_new4(str);
|
||||
RMATCH(match)->regexp = re;
|
||||
RMATCH(match)->rmatch->char_offset_updated = 0;
|
||||
|
@ -3088,12 +3097,14 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
|
|||
int no, clen;
|
||||
rb_encoding *str_enc = rb_enc_get(str);
|
||||
rb_encoding *src_enc = rb_enc_get(src);
|
||||
int acompat = rb_enc_asciicompat(str_enc);
|
||||
#define ASCGET(s,e,cl) (acompat ? (*cl=1,s[0]) : rb_enc_ascget(s, e, cl, str_enc))
|
||||
|
||||
p = s = RSTRING_PTR(str);
|
||||
e = s + RSTRING_LEN(str);
|
||||
|
||||
while (s < e) {
|
||||
int c = rb_enc_ascget(s, e, &clen, str_enc);
|
||||
int c = ASCGET(s, e, &clen);
|
||||
char *ss;
|
||||
|
||||
if (c == -1) {
|
||||
|
@ -3110,7 +3121,7 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
|
|||
}
|
||||
rb_enc_str_buf_cat(val, p, ss-p, str_enc);
|
||||
|
||||
c = rb_enc_ascget(s, e, &clen, str_enc);
|
||||
c = ASCGET(s, e, &clen);
|
||||
if (c == -1) {
|
||||
s += mbclen(s, e, str_enc);
|
||||
rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
|
||||
|
@ -3132,12 +3143,12 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
|
|||
break;
|
||||
|
||||
case 'k':
|
||||
if (s < e && rb_enc_ascget(s, e, &clen, str_enc) == '<') {
|
||||
if (s < e && ASCGET(s, e, &clen) == '<') {
|
||||
char *name, *name_end;
|
||||
|
||||
name_end = name = s + clen;
|
||||
while (name_end < e) {
|
||||
c = rb_enc_ascget(name_end, e, &clen, str_enc);
|
||||
c = ASCGET(name_end, e, &clen);
|
||||
if (c == '>') break;
|
||||
name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
|
||||
}
|
||||
|
|
2
regenc.h
2
regenc.h
|
@ -70,7 +70,7 @@ typedef struct {
|
|||
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
|
||||
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
|
||||
|
||||
#define enclen(enc,p,e) ONIGENC_MBC_ENC_LEN(enc,p,e)
|
||||
#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
|
||||
|
||||
/* character types bit flag */
|
||||
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
|
||||
|
|
21
regexec.c
21
regexec.c
|
@ -2758,16 +2758,25 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
|
|||
|
||||
s = (UChar* )text;
|
||||
|
||||
if (enc->max_enc_len == enc->min_enc_len) {
|
||||
int n = enc->max_enc_len;
|
||||
|
||||
while (s < end) {
|
||||
if (*s == *target) {
|
||||
p = s + 1;
|
||||
t = target + 1;
|
||||
if (memcmp(t, p, target_end - t) == 0)
|
||||
return s;
|
||||
}
|
||||
s += n;
|
||||
}
|
||||
return (UChar*)NULL;
|
||||
}
|
||||
while (s < end) {
|
||||
if (*s == *target) {
|
||||
p = s + 1;
|
||||
t = target + 1;
|
||||
while (t < target_end) {
|
||||
if (*t != *p++)
|
||||
break;
|
||||
t++;
|
||||
}
|
||||
if (t == target_end)
|
||||
if (memcmp(t, p, target_end - t) == 0)
|
||||
return s;
|
||||
}
|
||||
s += enclen(enc, s, end);
|
||||
|
|
|
@ -253,12 +253,12 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
|
|||
#define PUNFETCH p = pfetch_prev
|
||||
#define PINC do { \
|
||||
pfetch_prev = p; \
|
||||
p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
|
||||
p += enclen(enc, p, end); \
|
||||
} while (0)
|
||||
#define PFETCH(c) do { \
|
||||
c = ONIGENC_MBC_TO_CODE(enc, p, end); \
|
||||
c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
|
||||
pfetch_prev = p; \
|
||||
p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
|
||||
p += enclen(enc, p, end); \
|
||||
} while (0)
|
||||
|
||||
#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
|
||||
|
|
2
string.c
2
string.c
|
@ -256,7 +256,7 @@ rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc
|
|||
static inline void
|
||||
str_enc_copy(VALUE str1, VALUE str2)
|
||||
{
|
||||
rb_enc_internal_set_index(str1, ENCODING_GET(str2));
|
||||
rb_enc_set_index(str1, ENCODING_GET(str2));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
Loading…
Add table
Reference in a new issue