1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

rb_str_buf_append: add a fast path for ENC_CODERANGE_VALID

If the RHS has valid encoding, and both strings have the same
encoding, we can use the fast path.

However we need to update the LHS coderange.

```
compare-ruby: ruby 3.2.0dev (2022-07-21T14:46:32Z master cdbb9b8555) [arm64-darwin21]
built-ruby: ruby 3.2.0dev (2022-07-25T07:25:41Z string-concat-vali.. 11a2772bdd) [arm64-darwin21]
warming up...

|                    |compare-ruby|built-ruby|
|:-------------------|-----------:|---------:|
|binary_concat_7bit  |    554.816k|  556.460k|
|                    |           -|     1.00x|
|utf8_concat_7bit    |    556.367k|  555.101k|
|                    |       1.00x|         -|
|utf8_concat_UTF8    |    412.555k|  556.824k|
|                    |           -|     1.35x|
```
This commit is contained in:
Jean Boussier 2022-07-21 17:08:51 +02:00
parent f61dd38e5c
commit 31a5586d1e
Notes: git 2022-07-25 21:19:16 +09:00
2 changed files with 31 additions and 16 deletions

View file

@ -1,9 +1,9 @@
prelude: |
CHUNK = "a" * 64
BCHUNK = "a".b * 64
UCHUNK = "é" * 32
GC.disable # GC causes a lot of variance
benchmark:
binary_concat_utf8: |
binary_concat_7bit: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
@ -13,17 +13,7 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
binary_concat_binary: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
utf8_concat_utf8: |
utf8_concat_7bit: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
@ -33,3 +23,13 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
utf8_concat_UTF8: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK

View file

@ -3329,9 +3329,24 @@ VALUE
rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr = rb_enc_str_coderange(str2);
if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) {
if (str_enc_fastpath(str)) {
switch (str2_cr) {
case ENC_CODERANGE_7BIT:
// If RHS is 7bit we can do simple concatenation
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
return str;
case ENC_CODERANGE_VALID:
// If RHS is valid, we can do simple concatenation if encodings are the same
if (ENCODING_GET_INLINED(str) == ENCODING_GET_INLINED(str2)) {
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
int str_cr = ENC_CODERANGE(str);
if (UNLIKELY(str_cr != ENC_CODERANGE_VALID)) {
ENC_CODERANGE_SET(str, RB_ENC_CODERANGE_AND(str_cr, str2_cr));
}
return str;
}
}
}
rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),