mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
rb_str_buf_append: fastpath to str_buf_cat
If the LHS is ASCII compatible and the RHS is 7BIT
we can directly concat without being concerned about
anything else.
Benchmark:
```
compare-ruby: ruby 3.2.0dev (2022-07-12T15:01:11Z master 71aec68566
) [arm64-darwin21]
built-ruby: ruby 3.2.0dev (2022-07-13T10:13:53Z faster-buffer-conc.. a04c10476d) [arm64-darwin21]
warming up...
| |compare-ruby|built-ruby|
|:---------------------|-----------:|---------:|
|binary_append_utf8 | 385.315k| 573.663k|
| | -| 1.49x|
|binary_append_binary | 446.579k| 574.898k|
| | -| 1.29x|
|utf8_append_utf8 | 430.936k| 573.394k|
| | -| 1.33x|
```
Note that in the benchmark, the RHS always have a precomputed
coderange. So the benchmark never enter the slowpath of having to
scan the RHS. However it's extremly likely that we'll end
up scanning it anyway in rb_enc_cr_str_buf_cat
This commit is contained in:
parent
ee1d2b276a
commit
0ae8dbbee0
Notes:
git
2022-07-19 17:42:07 +09:00
2 changed files with 42 additions and 5 deletions
|
@ -1,8 +1,29 @@
|
|||
prelude: |
|
||||
CHUNK = "a" * 64
|
||||
BCHUNK = "a".b * 64
|
||||
benchmark:
|
||||
string_concat: |
|
||||
buffer = String.new(capacity: 4096)
|
||||
binary_concat_utf8: |
|
||||
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
binary_concat_binary: |
|
||||
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
||||
utf8_concat_utf8: |
|
||||
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||
|
|
22
string.c
22
string.c
|
@ -3303,12 +3303,28 @@ rb_str_buf_cat_ascii(VALUE str, const char *ptr)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
str_enc_fastpath(VALUE str)
|
||||
{
|
||||
// The overwhelming majority of strings are in one of these 3 encodings.
|
||||
switch (ENCODING_GET_INLINED(str)) {
|
||||
case ENCINDEX_ASCII_8BIT:
|
||||
case ENCINDEX_UTF_8:
|
||||
case ENCINDEX_US_ASCII:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_str_buf_append(VALUE str, VALUE str2)
|
||||
{
|
||||
int str2_cr;
|
||||
|
||||
str2_cr = ENC_CODERANGE(str2);
|
||||
int str2_cr = rb_enc_str_coderange(str2);
|
||||
if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) {
|
||||
str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2));
|
||||
return str;
|
||||
}
|
||||
|
||||
rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
|
||||
ENCODING_GET(str2), str2_cr, &str2_cr);
|
||||
|
|
Loading…
Reference in a new issue