mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
ObjectSpace.dump: Include string coderange
I suspect that some shared pages are invalidated because some static string don't have their coderange set eagerly. So the first time they are scanned, the entire memory page is invalidated. Being able to see the coderange in `ObjectSpace` would help debug this. And in addition `dump` currently call `is_broken_string()` and `is_ascii_string()` which both end up scanning the string and assigning coderange. I think it's undesirable as `dump` should be read only.
This commit is contained in:
parent
b92fb78696
commit
890df5f812
Notes:
git
2022-07-05 03:05:29 +09:00
2 changed files with 52 additions and 5 deletions
|
@ -313,6 +313,16 @@ reachable_object_i(VALUE ref, void *data)
|
|||
dc->cur_obj_references++;
|
||||
}
|
||||
|
||||
bool
|
||||
dump_string_ascii_only(const char *str, long size) {
|
||||
for (long i = 0; i < size; i++) {
|
||||
if (str[i] & 0x80) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_append_string_content(struct dump_config *dc, VALUE obj)
|
||||
{
|
||||
|
@ -323,9 +333,17 @@ dump_append_string_content(struct dump_config *dc, VALUE obj)
|
|||
dump_append_sizet(dc, rb_str_capacity(obj));
|
||||
}
|
||||
|
||||
if (is_ascii_string(obj)) {
|
||||
dump_append(dc, ", \"value\":");
|
||||
dump_append_string_value(dc, obj);
|
||||
if (RSTRING_LEN(obj) && rb_enc_asciicompat(rb_enc_from_index(ENCODING_GET(obj)))) {
|
||||
int cr = ENC_CODERANGE(obj);
|
||||
if (cr == RUBY_ENC_CODERANGE_UNKNOWN) {
|
||||
if (dump_string_ascii_only(RSTRING_PTR(obj), RSTRING_LEN(obj))) {
|
||||
cr = RUBY_ENC_CODERANGE_7BIT;
|
||||
}
|
||||
}
|
||||
if (cr == RUBY_ENC_CODERANGE_7BIT) {
|
||||
dump_append(dc, ", \"value\":");
|
||||
dump_append_string_value(dc, obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -389,8 +407,6 @@ dump_object(VALUE obj, struct dump_config *dc)
|
|||
case T_STRING:
|
||||
if (STR_EMBED_P(obj))
|
||||
dump_append(dc, ", \"embedded\":true");
|
||||
if (is_broken_string(obj))
|
||||
dump_append(dc, ", \"broken\":true");
|
||||
if (FL_TEST(obj, RSTRING_FSTR))
|
||||
dump_append(dc, ", \"fstring\":true");
|
||||
if (STR_SHARED_P(obj))
|
||||
|
@ -403,6 +419,27 @@ dump_object(VALUE obj, struct dump_config *dc)
|
|||
dump_append(dc, rb_enc_name(rb_enc_from_index(ENCODING_GET(obj))));
|
||||
dump_append(dc, "\"");
|
||||
}
|
||||
|
||||
dump_append(dc, ", \"coderange\":\"");
|
||||
switch (RB_ENC_CODERANGE(obj)) {
|
||||
case RUBY_ENC_CODERANGE_UNKNOWN:
|
||||
dump_append(dc, "unknown");
|
||||
break;
|
||||
case RUBY_ENC_CODERANGE_7BIT:
|
||||
dump_append(dc, "7bit");
|
||||
break;
|
||||
case RUBY_ENC_CODERANGE_VALID:
|
||||
dump_append(dc, "valid");
|
||||
break;
|
||||
case RUBY_ENC_CODERANGE_BROKEN:
|
||||
dump_append(dc, "broken");
|
||||
break;
|
||||
}
|
||||
dump_append(dc, "\"");
|
||||
|
||||
if (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_BROKEN)
|
||||
dump_append(dc, ", \"broken\":true");
|
||||
|
||||
break;
|
||||
|
||||
case T_HASH:
|
||||
|
|
|
@ -486,6 +486,16 @@ class TestObjSpace < Test::Unit::TestCase
|
|||
end
|
||||
end
|
||||
|
||||
def test_dump_string_coderange
|
||||
assert_includes ObjectSpace.dump("TEST STRING"), '"coderange":"7bit"'
|
||||
unknown = "TEST STRING".dup.force_encoding(Encoding::BINARY)
|
||||
2.times do # ensure that dumping the string doesn't mutate it
|
||||
assert_includes ObjectSpace.dump(unknown), '"coderange":"unknown"'
|
||||
end
|
||||
assert_includes ObjectSpace.dump("Fée"), '"coderange":"valid"'
|
||||
assert_includes ObjectSpace.dump("\xFF"), '"coderange":"broken"'
|
||||
end
|
||||
|
||||
def test_dump_escapes_method_name
|
||||
method_name = "foo\"bar"
|
||||
klass = Class.new do
|
||||
|
|
Loading…
Reference in a new issue