1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* regparse.c (PINC): use optimized enclen() instead of

ONIGENC_MBC_ENC_LEN().

* regparse.c (PFETCH): ditto.

* regparse.c (PFETCH): small optimization.

* regexec.c (slow_search): single byte encoding optimization.

* regenc.h (enclen): avoid calling function when encoding's
  min_len == max_len.

* re.c (rb_reg_regsub): rb_enc_ascget() optimization for single
  byte encoding.

* re.c (rb_reg_search): avoid allocating new re_registers if we
  already have MatchData.

* re.c (match_init_copy): avoid unnecessary onig_region_free()
  before onig_region_copy. 

* encoding.c (rb_enc_get_index): remove implicit enc_capable check
  each time.

* encoding.c (rb_enc_set_index): ditto.

* encoding.c (enc_compatible_p): small refactoring.

* include/ruby/encoding.h (rb_enc_dummy_p): inline
  rb_enc_dummy_p() and export related code.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16477 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2008-05-19 08:25:03 +00:00
parent 48a42a0387
commit 44cd8e457b
9 changed files with 115 additions and 69 deletions

View file

@ -1,3 +1,36 @@
Mon May 19 17:23:55 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* regparse.c (PINC): use optimized enclen() instead of
ONIGENC_MBC_ENC_LEN().
* regparse.c (PFETCH): ditto.
* regparse.c (PFETCH): small optimization.
* regexec.c (slow_search): single byte encoding optimization.
* regenc.h (enclen): avoid calling function when encoding's
min_len == max_len.
* re.c (rb_reg_regsub): rb_enc_ascget() optimization for single
byte encoding.
* re.c (rb_reg_search): avoid allocating new re_registers if we
already have MatchData.
* re.c (match_init_copy): avoid unnecessary onig_region_free()
before onig_region_copy.
* encoding.c (rb_enc_get_index): remove implicit enc_capable check
each time.
* encoding.c (rb_enc_set_index): ditto.
* encoding.c (enc_compatible_p): small refactoring.
* include/ruby/encoding.h (rb_enc_dummy_p): inline
rb_enc_dummy_p() and export related code.
Mon May 19 14:32:03 2008 Koichi Sasada <ko1@atdot.net>
* version.h: fix strange change by version.h update tool.

View file

@ -615,7 +615,7 @@ cont.$(OBJEXT): {$(VPATH)}cont.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}eval_intern.h {$(VPATH)}util.h {$(VPATH)}dln.h
time.$(OBJEXT): {$(VPATH)}time.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}missing.h {$(VPATH)}intern.h \
{$(VPATH)}st.h
{$(VPATH)}st.h {$(VPATH)}encoding.h
util.$(OBJEXT): {$(VPATH)}util.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}missing.h {$(VPATH)}intern.h \
{$(VPATH)}st.h {$(VPATH)}util.h

View file

@ -18,7 +18,7 @@
#endif
static ID id_encoding, id_base_encoding;
static VALUE rb_cEncoding;
VALUE rb_cEncoding;
struct rb_encoding_entry {
const char *name;
@ -38,14 +38,6 @@ void rb_enc_init(void);
#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
#define ENC_UNINITIALIZED (&rb_cEncoding)
#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
#define ENC_DUMMY_FLAG FL_USER2
#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
static int load_encoding(const char *name);
static VALUE enc_base_encoding(VALUE self);
@ -318,15 +310,6 @@ rb_encdb_dummy(const char *name)
return index;
}
int
rb_enc_dummy_p(rb_encoding *enc)
{
VALUE encoding;
if (!enc_initialized_p(enc)) return Qfalse;
encoding = rb_enc_from_encoding(enc);
return ENC_DUMMY_P(encoding);
}
/*
* call-seq:
* enc.dummy? => true or false
@ -343,7 +326,7 @@ rb_enc_dummy_p(rb_encoding *enc)
static VALUE
enc_dummy_p(VALUE enc)
{
return rb_enc_dummy_p(rb_to_encoding(enc)) ? Qtrue : Qfalse;
return ENC_DUMMY_P(enc) ? Qtrue : Qfalse;
}
static int
@ -555,7 +538,7 @@ rb_id_encoding(void)
}
int
rb_enc_internal_get_index(VALUE obj)
rb_enc_get_index(VALUE obj)
{
int i;
@ -570,7 +553,7 @@ rb_enc_internal_get_index(VALUE obj)
}
void
rb_enc_internal_set_index(VALUE obj, int idx)
rb_enc_set_index(VALUE obj, int idx)
{
if (idx < ENCODING_INLINE_MAX) {
ENCODING_SET_INLINED(obj, idx);
@ -584,14 +567,14 @@ rb_enc_internal_set_index(VALUE obj, int idx)
void
rb_enc_associate_index(VALUE obj, int idx)
{
enc_check_capable(obj);
if (rb_enc_internal_get_index(obj) == idx)
// enc_check_capable(obj);
if (rb_enc_get_index(obj) == idx)
return;
if (!ENC_CODERANGE_ASCIIONLY(obj) ||
!rb_enc_asciicompat(rb_enc_from_index(idx))) {
ENC_CODERANGE_CLEAR(obj);
}
rb_enc_internal_set_index(obj, idx);
rb_enc_set_index(obj, idx);
}
void
@ -600,13 +583,6 @@ rb_enc_associate(VALUE obj, rb_encoding *enc)
rb_enc_associate_index(obj, rb_enc_to_index(enc));
}
int
rb_enc_get_index(VALUE obj)
{
if (!enc_capable(obj)) return -1;
return rb_enc_internal_get_index(obj);
}
rb_encoding*
rb_enc_get(VALUE obj)
{
@ -906,11 +882,13 @@ enc_find(VALUE klass, VALUE enc)
static VALUE
enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
{
rb_encoding *enc = rb_enc_compatible(str1, str2);
VALUE encoding = Qnil;
if (!enc || !(encoding = rb_enc_from_encoding(enc)))
encoding = Qnil;
return encoding;
rb_encoding *enc;
if (!enc_capable(str1)) return Qnil;
if (!enc_capable(str2)) return Qnil;
enc = rb_enc_compatible(str1, str2);
if (!enc) return Qnil;
return rb_enc_from_encoding(enc);
}
/* :nodoc: */

View file

@ -33,14 +33,14 @@
if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
else \
rb_enc_internal_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
rb_enc_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
} while (0)
#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
#define ENCODING_GET(obj) \
(ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
ENCODING_GET_INLINED(obj) : \
rb_enc_internal_get_index(obj))
rb_enc_get_index(obj))
#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
@ -74,9 +74,9 @@ typedef OnigEncodingType rb_encoding;
int rb_enc_replicate(const char *, rb_encoding *);
int rb_define_dummy_encoding(const char *);
int rb_enc_dummy_p(rb_encoding *);
#define rb_enc_to_index(enc) ((enc) ? ((enc)->ruby_encoding_index) : 0)
int rb_enc_get_index(VALUE obj);
void rb_enc_set_index(VALUE obj, int encindex);
int rb_enc_find_index(const char *name);
int rb_to_encoding_index(VALUE);
rb_encoding* rb_to_encoding(VALUE);
@ -86,8 +86,6 @@ rb_encoding* rb_enc_check(VALUE,VALUE);
void rb_enc_associate_index(VALUE, int);
void rb_enc_associate(VALUE, rb_encoding*);
void rb_enc_copy(VALUE dst, VALUE src);
int rb_enc_internal_get_index(VALUE obj);
void rb_enc_internal_set_index(VALUE obj, int encindex);
VALUE rb_enc_str_new(const char*, long, rb_encoding*);
VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int);
@ -154,7 +152,7 @@ int rb_enc_codelen(int code, rb_encoding *enc);
#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
#define rb_enc_asciicompat(enc) (!rb_enc_dummy_p(enc) && rb_enc_mbminlen(enc)==1)
#define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc))
int rb_enc_casefold(char *to, const char *p, const char *e, rb_encoding *enc);
int rb_enc_toupper(int c, rb_encoding *enc);
@ -178,4 +176,21 @@ void rb_enc_set_default_external(VALUE encoding);
VALUE rb_locale_charmap(VALUE klass);
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
RUBY_EXTERN VALUE rb_cEncoding;
#define ENC_UNINITIALIZED (&rb_cEncoding)
#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
#define ENC_DUMMY_FLAG FL_USER2
#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
static inline int
rb_enc_dummy_p(rb_encoding *enc)
{
if (!enc_initialized_p(enc)) return Qfalse;
return ENC_DUMMY_P(ENC_FROM_ENCODING(enc));
}
#endif /* RUBY_ENCODING_H */

41
re.c
View file

@ -881,9 +881,6 @@ match_init_copy(VALUE obj, VALUE orig)
RMATCH(obj)->regexp = RMATCH(orig)->regexp;
rm = RMATCH(obj)->rmatch;
onig_region_free(&rm->regs, 0);
rm->regs.allocated = 0;
onig_region_copy(&rm->regs, RMATCH_REGS(orig));
if (!RMATCH(orig)->rmatch->char_offset_updated) {
@ -1265,7 +1262,7 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
{
int result;
VALUE match;
struct re_registers regs;
struct re_registers *regs, regi;
char *range = RSTRING_PTR(str);
regex_t *reg0 = RREGEXP(re)->ptr, *reg;
int busy = FL_TEST(re, REG_BUSY);
@ -1277,17 +1274,29 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
reg = rb_reg_prepare_re(re, str);
match = rb_backref_get();
if (!NIL_P(match)) {
if (FL_TEST(match, MATCH_BUSY)) {
match = Qnil;
}
else {
regs = RMATCH_REGS(match);
}
}
if (NIL_P(match)) {
regs = &regi;
MEMZERO(regs, struct re_registers, 1);
}
FL_SET(re, REG_BUSY);
if (!reverse) {
range += RSTRING_LEN(str);
}
MEMZERO(&regs, struct re_registers, 1);
result = onig_search(reg,
(UChar*)(RSTRING_PTR(str)),
((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)),
((UChar*)(RSTRING_PTR(str)) + pos),
((UChar*)range),
&regs, ONIG_OPTION_NONE);
regs, ONIG_OPTION_NONE);
if (RREGEXP(re)->ptr != reg) {
if (busy) {
@ -1300,7 +1309,8 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
}
if (!busy) FL_UNSET(re, REG_BUSY);
if (result < 0) {
onig_region_free(&regs, 0);
if (regs == &regi)
onig_region_free(regs, 0);
if (result == ONIG_MISMATCH) {
rb_backref_set(Qnil);
return result;
@ -1312,9 +1322,10 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
}
}
match = rb_backref_get();
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
if (NIL_P(match)) {
match = match_alloc(rb_cMatch);
onig_region_copy(RMATCH_REGS(match), regs);
onig_region_free(regs, 0);
}
else {
if (rb_safe_level() >= 3)
@ -1323,8 +1334,6 @@ rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
FL_UNSET(match, FL_TAINT);
}
onig_region_copy(RMATCH_REGS(match), &regs);
onig_region_free(&regs, 0);
RMATCH(match)->str = rb_str_new4(str);
RMATCH(match)->regexp = re;
RMATCH(match)->rmatch->char_offset_updated = 0;
@ -3088,12 +3097,14 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
int no, clen;
rb_encoding *str_enc = rb_enc_get(str);
rb_encoding *src_enc = rb_enc_get(src);
int acompat = rb_enc_asciicompat(str_enc);
#define ASCGET(s,e,cl) (acompat ? (*cl=1,s[0]) : rb_enc_ascget(s, e, cl, str_enc))
p = s = RSTRING_PTR(str);
e = s + RSTRING_LEN(str);
while (s < e) {
int c = rb_enc_ascget(s, e, &clen, str_enc);
int c = ASCGET(s, e, &clen);
char *ss;
if (c == -1) {
@ -3110,7 +3121,7 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
}
rb_enc_str_buf_cat(val, p, ss-p, str_enc);
c = rb_enc_ascget(s, e, &clen, str_enc);
c = ASCGET(s, e, &clen);
if (c == -1) {
s += mbclen(s, e, str_enc);
rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
@ -3132,12 +3143,12 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
break;
case 'k':
if (s < e && rb_enc_ascget(s, e, &clen, str_enc) == '<') {
if (s < e && ASCGET(s, e, &clen) == '<') {
char *name, *name_end;
name_end = name = s + clen;
while (name_end < e) {
c = rb_enc_ascget(name_end, e, &clen, str_enc);
c = ASCGET(name_end, e, &clen);
if (c == '>') break;
name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
}

View file

@ -70,7 +70,7 @@ typedef struct {
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
#define enclen(enc,p,e) ONIGENC_MBC_ENC_LEN(enc,p,e)
#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
/* character types bit flag */
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)

View file

@ -2758,16 +2758,25 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
s = (UChar* )text;
if (enc->max_enc_len == enc->min_enc_len) {
int n = enc->max_enc_len;
while (s < end) {
if (*s == *target) {
p = s + 1;
t = target + 1;
if (memcmp(t, p, target_end - t) == 0)
return s;
}
s += n;
}
return (UChar*)NULL;
}
while (s < end) {
if (*s == *target) {
p = s + 1;
t = target + 1;
while (t < target_end) {
if (*t != *p++)
break;
t++;
}
if (t == target_end)
if (memcmp(t, p, target_end - t) == 0)
return s;
}
s += enclen(enc, s, end);

View file

@ -253,12 +253,12 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
#define PUNFETCH p = pfetch_prev
#define PINC do { \
pfetch_prev = p; \
p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
p += enclen(enc, p, end); \
} while (0)
#define PFETCH(c) do { \
c = ONIGENC_MBC_TO_CODE(enc, p, end); \
c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
pfetch_prev = p; \
p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
p += enclen(enc, p, end); \
} while (0)
#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)

View file

@ -256,7 +256,7 @@ rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc
static inline void
str_enc_copy(VALUE str1, VALUE str2)
{
rb_enc_internal_set_index(str1, ENCODING_GET(str2));
rb_enc_set_index(str1, ENCODING_GET(str2));
}
static void