mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Cheaply derive code range for String#b return value
The result of String#b is a string with an ASCII_8BIT/BINARY encoding. That encoding is ASCII-compatible and has no byte sequences that are invalid for the encoding. If we know the receiver's code range, we can derive the resulting string's code range without needing to perform a full code range scan.
This commit is contained in:
parent
9e6d07f346
commit
9a8f6e392f
Notes:
git
2022-07-26 16:04:11 +09:00
1 changed files with 17 additions and 1 deletions
18
string.c
18
string.c
|
@ -10779,7 +10779,23 @@ rb_str_b(VALUE str)
|
|||
str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
|
||||
}
|
||||
str_replace_shared_without_enc(str2, str);
|
||||
ENC_CODERANGE_CLEAR(str2);
|
||||
|
||||
// BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
|
||||
// If we know the receiver's code range then we know the result's code range.
|
||||
int cr = ENC_CODERANGE(str);
|
||||
switch (cr) {
|
||||
case ENC_CODERANGE_7BIT:
|
||||
ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
|
||||
break;
|
||||
case ENC_CODERANGE_BROKEN:
|
||||
case ENC_CODERANGE_VALID:
|
||||
ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
|
||||
break;
|
||||
default:
|
||||
ENC_CODERANGE_CLEAR(str2);
|
||||
break;
|
||||
}
|
||||
|
||||
return str2;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue