From c958d9a9c6955e70297d93e8f9f86cf1b3196107 Mon Sep 17 00:00:00 2001
From: matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Thu, 13 Dec 2007 18:00:50 +0000
Subject: [PATCH] * string.c (str_nth): direct jump if string is 7bit only. 
 great   performance boost for worst case.

* string.c (str_strlen): direct size if string is 7bit only.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14221 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 ChangeLog |  7 +++++++
 string.c  | 34 ++++++++++++++++++++++------------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 18f6a2cb25..2515d638a6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Fri Dec 14 02:55:41 2007  Yukihiro Matsumoto  <matz@ruby-lang.org>
+
+	* string.c (str_nth): direct jump if string is 7bit only.  great
+	  performance boost for worst case.
+
+	* string.c (str_strlen): direct size if string is 7bit only.
+
 Fri Dec 14 02:29:32 2007  Yukihiro Matsumoto  <matz@ruby-lang.org>
 
 	* encoding.c (rb_enc_compatible): 1st argument (typically the
diff --git a/string.c b/string.c
index edbc8d6d6e..42c3ca51d6 100644
--- a/string.c
+++ b/string.c
@@ -93,6 +93,7 @@ VALUE rb_cSymbol;
 } while (0)
 
 #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
+#define IS_7BIT(str) (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
 
 VALUE rb_fs;
 
@@ -472,6 +473,7 @@ str_strlen(VALUE str, rb_encoding *enc)
 {
     long len;
 
+    if (is_ascii_string(str)) return RSTRING_LEN(str);
     if (!enc) enc = rb_enc_get(str);
     len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc);
     if (len < 0) {
@@ -750,9 +752,12 @@ rb_str_s_try_convert(VALUE dummy, VALUE str)
 }
 
 static char*
-str_nth(const char *p, const char *e, int nth, rb_encoding *enc)
+str_nth(const char *p, const char *e, int nth, rb_encoding *enc, int asc)
 {
-    p = rb_enc_nth(p, e, nth, enc);
+    if (asc)
+	p += nth;
+    else
+	p = rb_enc_nth(p, e, nth, enc);
     if (!p) {
 	rb_raise(rb_eArgError, "invalid mbstring sequence");
     }
@@ -763,9 +768,9 @@ str_nth(const char *p, const char *e, int nth, rb_encoding *enc)
 }
 
 static int
-str_offset(const char *p, const char *e, int nth, rb_encoding *enc)
+str_offset(const char *p, const char *e, int nth, rb_encoding *enc, int asc)
 {
-    const char *pp = str_nth(p, e, nth, enc);
+    const char *pp = str_nth(p, e, nth, enc, asc);
 
     return pp - p;
 }
@@ -811,6 +816,7 @@ rb_str_substr(VALUE str, long beg, long len)
     rb_encoding *enc = rb_enc_get(str);
     VALUE str2;
     char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
+    int asc = IS_7BIT(str);
 
     if (len < 0) return Qnil;
     if (!RSTRING_LEN(str)) {
@@ -839,7 +845,7 @@ rb_str_substr(VALUE str, long beg, long len)
     if (len == 0) {
 	p = 0;
     }
-    else if ((p = str_nth(s, e, beg, enc)) == e) {
+    else if ((p = str_nth(s, e, beg, enc, asc)) == e) {
 	len = 0;
     }
     else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
@@ -850,7 +856,7 @@ rb_str_substr(VALUE str, long beg, long len)
 	    len *= rb_enc_mbmaxlen(enc);
     }
     else {
-	len = str_offset(p, e, len, enc);
+	len = str_offset(p, e, len, enc, asc);
     }
   sub:
     str2 = rb_str_new5(str, p, len);
@@ -1432,7 +1438,7 @@ rb_str_index(VALUE str, VALUE sub, long offset)
     if (len - offset < slen) return -1;
     s = RSTRING_PTR(str);
     if (offset) {
-	s = str_nth(s, RSTRING_END(str), offset, enc);
+	s = str_nth(s, RSTRING_END(str), offset, enc, IS_7BIT(str));
 	offset = s - RSTRING_PTR(str);
     }
     if (slen == 0) return offset;
@@ -1530,6 +1536,7 @@ rb_str_rindex(VALUE str, VALUE sub, long pos)
     long len, slen;
     char *s, *sbeg, *e, *t;
     rb_encoding *enc;
+    int asc = IS_7BIT(str);
 
     enc = rb_enc_check(str, sub);
     len = str_strlen(str, enc);
@@ -1546,7 +1553,7 @@ rb_str_rindex(VALUE str, VALUE sub, long pos)
     e = RSTRING_END(str);
     t = RSTRING_PTR(sub);
     for (;;) {
-	s = str_nth(sbeg, e, pos, enc);
+	s = str_nth(sbeg, e, pos, enc, asc);
 	if (memcmp(s, t, slen) == 0) {
 	    return pos;
 	}
@@ -2087,6 +2094,7 @@ rb_str_splice(VALUE str, long beg, long len, VALUE val)
     long slen;
     char *p, *e;
     rb_encoding *enc;
+    int asc = IS_7BIT(str);
 
     if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
 
@@ -2108,8 +2116,8 @@ rb_str_splice(VALUE str, long beg, long len, VALUE val)
     if (slen < len || slen < beg + len) {
 	len = slen - beg;
     }
-    p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc);
-    e = str_nth(p, RSTRING_END(str), len, enc);
+    p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, asc);
+    e = str_nth(p, RSTRING_END(str), len, enc, asc);
     /* error check */
     beg = p - RSTRING_PTR(str);	/* physical position */
     len = e - p;		/* physical length */
@@ -4988,6 +4996,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
     char *p, *f = " ";
     long n, llen, rlen;
     volatile VALUE pad;
+    int asc = 1;
 
     rb_scan_args(argc, argv, "11", &w, &pad);
     enc = rb_enc_get(str);
@@ -4998,6 +5007,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
 	f = RSTRING_PTR(pad);
 	flen = RSTRING_LEN(pad);
 	fclen = str_strlen(pad, enc);
+	asc = is_ascii_string(pad);
 	if (flen == 0) {
 	    rb_raise(rb_eArgError, "zero width padding");
 	}
@@ -5020,7 +5030,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
 	    llen -= fclen;
 	}
 	else {
-	    char *fp = str_nth(f, f+flen, llen, enc);
+	    char *fp = str_nth(f, f+flen, llen, enc, asc);
 	    n = fp - f;
 	    memcpy(p,f,n);
 	    p+=n;
@@ -5040,7 +5050,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
 	    rlen -= fclen;
 	}
 	else {
-	    char *fp = str_nth(f, f+flen, rlen, enc);
+	    char *fp = str_nth(f, f+flen, rlen, enc, asc);
 	    n = fp - f;
 	    memcpy(p,f,n);
 	    p+=n;