1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/ext/objspace/objspace_dump.c

711 lines
19 KiB
C
Raw Normal View History

/**********************************************************************
objspace_dump.c - Heap dumping ObjectSpace extender for MRI.
$Author$
created at: Sat Oct 11 10:11:00 2013
NOTE: This extension library is not expected to exist except C Ruby.
All the files in this distribution are covered under the Ruby's
license (see the file COPYING).
**********************************************************************/
#include "gc.h"
#include "internal.h"
#include "internal/hash.h"
#include "internal/string.h"
#include "node.h"
#include "objspace.h"
#include "ruby/debug.h"
#include "ruby/util.h"
#include "ruby/io.h"
#include "vm_core.h"
RUBY_EXTERN const char ruby_hexdigits[];
static VALUE sym_output, sym_stdout, sym_string, sym_file;
static VALUE sym_full, sym_since;
#define BUFFER_CAPACITY 4096
struct dump_config {
VALUE type;
VALUE stream;
VALUE string;
const char *root_category;
VALUE cur_obj;
VALUE cur_obj_klass;
size_t cur_obj_references;
unsigned int roots: 1;
unsigned int full_heap: 1;
unsigned int partial_dump;
size_t since;
unsigned long buffer_len;
char buffer[BUFFER_CAPACITY];
};
static void
dump_flush(struct dump_config *dc)
{
if (dc->buffer_len) {
if (dc->stream) {
size_t written = rb_io_bufwrite(dc->stream, dc->buffer, dc->buffer_len);
if (written < dc->buffer_len) {
MEMMOVE(dc->buffer, dc->buffer + written, char, dc->buffer_len - written);
dc->buffer_len -= written;
return;
}
}
else if (dc->string) {
rb_str_cat(dc->string, dc->buffer, dc->buffer_len);
}
dc->buffer_len = 0;
}
}
static inline void
buffer_ensure_capa(struct dump_config *dc, unsigned long requested)
{
RUBY_ASSERT(requested <= BUFFER_CAPACITY);
if (requested + dc->buffer_len >= BUFFER_CAPACITY) {
dump_flush(dc);
if (requested + dc->buffer_len >= BUFFER_CAPACITY) {
rb_raise(rb_eIOError, "full buffer");
}
}
}
static void buffer_append(struct dump_config *dc, const char *cstr, unsigned long len)
{
if (LIKELY(len > 0)) {
buffer_ensure_capa(dc, len);
MEMCPY(dc->buffer + dc->buffer_len, cstr, char, len);
dc->buffer_len += len;
}
}
# define dump_append(dc, str) buffer_append(dc, (str), (long)strlen(str))
static void
dump_append_ld(struct dump_config *dc, const long number)
{
const int width = DECIMAL_SIZE_OF_BITS(sizeof(number) * CHAR_BIT - 1) + 2;
buffer_ensure_capa(dc, width);
unsigned long required = snprintf(dc->buffer + dc->buffer_len, width, "%ld", number);
RUBY_ASSERT(required <= width);
dc->buffer_len += required;
}
static void
dump_append_lu(struct dump_config *dc, const unsigned long number)
{
const int width = DECIMAL_SIZE_OF_BITS(sizeof(number) * CHAR_BIT) + 1;
buffer_ensure_capa(dc, width);
unsigned long required = snprintf(dc->buffer + dc->buffer_len, width, "%lu", number);
RUBY_ASSERT(required <= width);
dc->buffer_len += required;
}
static void
dump_append_g(struct dump_config *dc, const double number)
{
unsigned long capa_left = BUFFER_CAPACITY - dc->buffer_len;
unsigned long required = snprintf(dc->buffer + dc->buffer_len, capa_left, "%#g", number);
if (required >= capa_left) {
buffer_ensure_capa(dc, required);
capa_left = BUFFER_CAPACITY - dc->buffer_len;
snprintf(dc->buffer + dc->buffer_len, capa_left, "%#g", number);
}
dc->buffer_len += required;
}
static void
dump_append_d(struct dump_config *dc, const int number)
{
const int width = DECIMAL_SIZE_OF_BITS(sizeof(number) * CHAR_BIT - 1) + 2;
buffer_ensure_capa(dc, width);
unsigned long required = snprintf(dc->buffer + dc->buffer_len, width, "%d", number);
RUBY_ASSERT(required <= width);
dc->buffer_len += required;
}
static void
dump_append_sizet(struct dump_config *dc, const size_t number)
{
const int width = DECIMAL_SIZE_OF_BITS(sizeof(number) * CHAR_BIT) + 1;
buffer_ensure_capa(dc, width);
unsigned long required = snprintf(dc->buffer + dc->buffer_len, width, "%"PRIuSIZE, number);
RUBY_ASSERT(required <= width);
dc->buffer_len += required;
}
static void
dump_append_c(struct dump_config *dc, char c)
{
if (c <= 0x1f) {
const int width = (sizeof(c) * CHAR_BIT / 4) + 5;
buffer_ensure_capa(dc, width);
unsigned long required = snprintf(dc->buffer + dc->buffer_len, width, "\\u00%02x", c);
RUBY_ASSERT(required <= width);
dc->buffer_len += required;
}
else {
buffer_ensure_capa(dc, 1);
dc->buffer[dc->buffer_len] = c;
dc->buffer_len++;
}
}
static void
dump_append_ref(struct dump_config *dc, VALUE ref)
{
RUBY_ASSERT(ref > 0);
char buffer[((sizeof(VALUE) * CHAR_BIT + 3) / 4) + 4];
char *buffer_start, *buffer_end;
buffer_start = buffer_end = &buffer[sizeof(buffer)];
*--buffer_start = '"';
while (ref) {
*--buffer_start = ruby_hexdigits[ref & 0xF];
ref >>= 4;
}
*--buffer_start = 'x';
*--buffer_start = '0';
*--buffer_start = '"';
buffer_append(dc, buffer_start, buffer_end - buffer_start);
}
static void
dump_append_string_value(struct dump_config *dc, VALUE obj)
{
long i;
char c;
const char *value;
dump_append(dc, "\"");
for (i = 0, value = RSTRING_PTR(obj); i < RSTRING_LEN(obj); i++) {
switch ((c = value[i])) {
case '\\':
dump_append(dc, "\\\\");
case '"':
dump_append(dc, "\\\"");
break;
case '\0':
dump_append(dc, "\\u0000");
break;
case '\b':
dump_append(dc, "\\b");
break;
case '\t':
dump_append(dc, "\\t");
break;
case '\f':
dump_append(dc, "\\f");
break;
case '\n':
dump_append(dc, "\\n");
break;
case '\r':
dump_append(dc, "\\r");
break;
case '\177':
dump_append(dc, "\\u007f");
break;
default:
dump_append_c(dc, c);
}
}
dump_append(dc, "\"");
}
static void
dump_append_symbol_value(struct dump_config *dc, VALUE obj)
{
dump_append(dc, "{\"type\":\"SYMBOL\", \"value\":");
dump_append_string_value(dc, rb_sym2str(obj));
dump_append(dc, "}");
}
static inline const char *
obj_type(VALUE obj)
{
switch (BUILTIN_TYPE(obj)) {
#define CASE_TYPE(type) case T_##type: return #type
CASE_TYPE(NONE);
CASE_TYPE(NIL);
CASE_TYPE(OBJECT);
CASE_TYPE(CLASS);
CASE_TYPE(ICLASS);
CASE_TYPE(MODULE);
CASE_TYPE(FLOAT);
CASE_TYPE(STRING);
CASE_TYPE(REGEXP);
CASE_TYPE(ARRAY);
CASE_TYPE(HASH);
CASE_TYPE(STRUCT);
CASE_TYPE(BIGNUM);
CASE_TYPE(FILE);
CASE_TYPE(FIXNUM);
CASE_TYPE(TRUE);
CASE_TYPE(FALSE);
CASE_TYPE(DATA);
CASE_TYPE(MATCH);
CASE_TYPE(SYMBOL);
CASE_TYPE(RATIONAL);
CASE_TYPE(COMPLEX);
CASE_TYPE(IMEMO);
CASE_TYPE(UNDEF);
CASE_TYPE(NODE);
CASE_TYPE(ZOMBIE);
#undef CASE_TYPE
2020-04-08 02:13:37 -04:00
default: break;
}
return "UNKNOWN";
}
static void
dump_append_special_const(struct dump_config *dc, VALUE value)
{
if (value == Qtrue) {
dump_append(dc, "true");
}
else if (value == Qfalse) {
dump_append(dc, "false");
}
else if (value == Qnil) {
dump_append(dc, "null");
}
else if (FIXNUM_P(value)) {
dump_append_ld(dc, FIX2LONG(value));
}
else if (FLONUM_P(value)) {
dump_append_g(dc, RFLOAT_VALUE(value));
}
else if (SYMBOL_P(value)) {
dump_append_symbol_value(dc, value);
}
else {
dump_append(dc, "{}");
}
}
static void
reachable_object_i(VALUE ref, void *data)
{
struct dump_config *dc = (struct dump_config *)data;
if (dc->cur_obj_klass == ref)
return;
if (dc->cur_obj_references == 0) {
dump_append(dc, ", \"references\":[");
dump_append_ref(dc, ref);
}
else {
dump_append(dc, ", ");
dump_append_ref(dc, ref);
}
dc->cur_obj_references++;
}
static void
dump_append_string_content(struct dump_config *dc, VALUE obj)
{
dump_append(dc, ", \"bytesize\":");
dump_append_ld(dc, RSTRING_LEN(obj));
if (!STR_EMBED_P(obj) && !STR_SHARED_P(obj) && (long)rb_str_capacity(obj) != RSTRING_LEN(obj)) {
dump_append(dc, ", \"capacity\":");
dump_append_sizet(dc, rb_str_capacity(obj));
}
if (is_ascii_string(obj)) {
dump_append(dc, ", \"value\":");
dump_append_string_value(dc, obj);
}
}
static void
dump_object(VALUE obj, struct dump_config *dc)
{
size_t memsize;
struct allocation_info *ainfo = objspace_lookup_allocation_info(obj);
rb_io_t *fptr;
ID flags[RB_OBJ_GC_FLAGS_MAX];
size_t n, i;
if (SPECIAL_CONST_P(obj)) {
dump_append_special_const(dc, obj);
return;
}
dc->cur_obj = obj;
dc->cur_obj_references = 0;
dc->cur_obj_klass = BUILTIN_TYPE(obj) == T_NODE ? 0 : RBASIC_CLASS(obj);
if (dc->partial_dump && (!ainfo || ainfo->generation < dc->since)) {
return;
}
if (dc->cur_obj == dc->string)
return;
dump_append(dc, "{\"address\":");
dump_append_ref(dc, obj);
dump_append(dc, ", \"type\":\"");
dump_append(dc, obj_type(obj));
dump_append(dc, "\"");
if (dc->cur_obj_klass) {
dump_append(dc, ", \"class\":");
dump_append_ref(dc, dc->cur_obj_klass);
}
if (rb_obj_frozen_p(obj))
dump_append(dc, ", \"frozen\":true");
switch (BUILTIN_TYPE(obj)) {
case T_NONE:
dump_append(dc, "}\n");
return;
case T_IMEMO:
dump_append(dc, ", \"imemo_type\":\"");
dump_append(dc, rb_imemo_name(imemo_type(obj)));
dump_append(dc, "\"");
break;
case T_SYMBOL:
dump_append_string_content(dc, rb_sym2str(obj));
break;
case T_STRING:
if (STR_EMBED_P(obj))
dump_append(dc, ", \"embedded\":true");
if (is_broken_string(obj))
dump_append(dc, ", \"broken\":true");
if (FL_TEST(obj, RSTRING_FSTR))
dump_append(dc, ", \"fstring\":true");
if (STR_SHARED_P(obj))
dump_append(dc, ", \"shared\":true");
else
dump_append_string_content(dc, obj);
if (!ENCODING_IS_ASCII8BIT(obj)) {
dump_append(dc, ", \"encoding\":\"");
dump_append(dc, rb_enc_name(rb_enc_from_index(ENCODING_GET(obj))));
dump_append(dc, "\"");
}
break;
case T_HASH:
dump_append(dc, ", \"size\":");
dump_append_sizet(dc, (size_t)RHASH_SIZE(obj));
if (FL_TEST(obj, RHASH_PROC_DEFAULT)) {
dump_append(dc, ", \"default\":");
dump_append_ref(dc, RHASH_IFNONE(obj));
}
break;
case T_ARRAY:
dump_append(dc, ", \"length\":");
dump_append_ld(dc, RARRAY_LEN(obj));
if (RARRAY_LEN(obj) > 0 && FL_TEST(obj, ELTS_SHARED))
dump_append(dc, ", \"shared\":true");
if (RARRAY_LEN(obj) > 0 && FL_TEST(obj, RARRAY_EMBED_FLAG))
dump_append(dc, ", \"embedded\":true");
break;
case T_CLASS:
case T_MODULE:
if (dc->cur_obj_klass) {
VALUE mod_name = rb_mod_name(obj);
if (!NIL_P(mod_name)) {
dump_append(dc, ", \"name\":\"");
dump_append(dc, RSTRING_PTR(mod_name));
dump_append(dc, "\"");
}
}
break;
case T_DATA:
if (RTYPEDDATA_P(obj)) {
dump_append(dc, ", \"struct\":\"");
dump_append(dc, RTYPEDDATA_TYPE(obj)->wrap_struct_name);
dump_append(dc, "\"");
}
break;
case T_FLOAT:
dump_append(dc, ", \"value\":\"");
dump_append_g(dc, RFLOAT_VALUE(obj));
dump_append(dc, "\"");
break;
case T_OBJECT:
dump_append(dc, ", \"ivars\":");
dump_append_lu(dc, ROBJECT_NUMIV(obj));
break;
case T_FILE:
fptr = RFILE(obj)->fptr;
if (fptr) {
dump_append(dc, ", \"fd\":");
dump_append_d(dc, fptr->fd);
}
break;
case T_ZOMBIE:
dump_append(dc, "}\n");
return;
2020-04-08 02:13:37 -04:00
default:
break;
}
rb_objspace_reachable_objects_from(obj, reachable_object_i, dc);
if (dc->cur_obj_references > 0)
dump_append(dc, "]");
if (ainfo) {
dump_append(dc, ", \"file\":\"");
dump_append(dc, ainfo->path);
dump_append(dc, "\", \"line\":");
dump_append_lu(dc, ainfo->line);
if (RTEST(ainfo->mid)) {
VALUE m = rb_sym2str(ainfo->mid);
dump_append(dc, ", \"method\":");
dump_append_string_value(dc, m);
}
dump_append(dc, ", \"generation\":");
dump_append_sizet(dc, ainfo->generation);
}
if ((memsize = rb_obj_memsize_of(obj)) > 0) {
dump_append(dc, ", \"memsize\":");
dump_append_sizet(dc, memsize);
}
if ((n = rb_obj_gc_flags(obj, flags, sizeof(flags))) > 0) {
dump_append(dc, ", \"flags\":{");
for (i=0; i<n; i++) {
dump_append(dc, "\"");
dump_append(dc, rb_id2name(flags[i]));
dump_append(dc, "\":true");
if (i != n-1) dump_append(dc, ", ");
}
dump_append(dc, "}");
}
dump_append(dc, "}\n");
}
static int
heap_i(void *vstart, void *vend, size_t stride, void *data)
{
struct dump_config *dc = (struct dump_config *)data;
VALUE v = (VALUE)vstart;
for (; v != (VALUE)vend; v += stride) {
if (dc->full_heap || RBASIC(v)->flags)
dump_object(v, dc);
}
return 0;
}
static void
root_obj_i(const char *category, VALUE obj, void *data)
{
struct dump_config *dc = (struct dump_config *)data;
if (dc->root_category != NULL && category != dc->root_category)
dump_append(dc, "]}\n");
if (dc->root_category == NULL || category != dc->root_category) {
dump_append(dc, "{\"type\":\"ROOT\", \"root\":\"");
dump_append(dc, category);
dump_append(dc, "\", \"references\":[");
dump_append_ref(dc, obj);
}
else {
dump_append(dc, ", ");
dump_append_ref(dc, obj);
}
dc->root_category = category;
dc->roots = 1;
}
static VALUE
dump_output(struct dump_config *dc, VALUE opts, VALUE output, const char *filename)
{
VALUE tmp;
dc->full_heap = 0;
dc->buffer_len = 0;
if (RTEST(opts)) {
output = rb_hash_aref(opts, sym_output);
if (Qtrue == rb_hash_lookup2(opts, sym_full, Qfalse))
dc->full_heap = 1;
VALUE since = rb_hash_aref(opts, sym_since);
if (RTEST(since)) {
dc->partial_dump = 1;
dc->since = NUM2SIZET(since);
} else {
dc->partial_dump = 0;
}
}
if (output == sym_stdout) {
dc->stream = rb_stdout;
dc->string = Qnil;
}
else if (output == sym_file || output == Qnil) {
rb_require("tempfile");
tmp = rb_assoc_new(rb_str_new_cstr(filename), rb_str_new_cstr(".json"));
tmp = rb_funcallv(rb_path2class("Tempfile"), rb_intern("create"), 1, &tmp);
io:
dc->string = Qnil;
dc->stream = rb_io_get_write_io(tmp);
}
else if (output == sym_string) {
dc->string = rb_str_new_cstr("");
}
else if (!NIL_P(tmp = rb_io_check_io(output))) {
output = sym_file;
goto io;
}
else {
rb_raise(rb_eArgError, "wrong output option: %"PRIsVALUE, output);
}
return output;
}
static VALUE
dump_result(struct dump_config *dc, VALUE output)
{
dump_flush(dc);
if (output == sym_string) {
return rb_str_resurrect(dc->string);
}
else if (output == sym_file) {
rb_io_flush(dc->stream);
return dc->stream;
}
else {
return Qnil;
}
}
/*
* call-seq:
* ObjectSpace.dump(obj[, output: :string]) # => "{ ... }"
* ObjectSpace.dump(obj, output: :file) # => #<File:/tmp/rubyobj20131125-88733-1xkfmpv.json>
* ObjectSpace.dump(obj, output: :stdout) # => nil
*
* Dump the contents of a ruby object as JSON.
*
* This method is only expected to work with C Ruby.
* This is an experimental method and is subject to change.
* In particular, the function signature and output format are
* not guaranteed to be compatible in future versions of ruby.
*/
static VALUE
objspace_dump(int argc, VALUE *argv, VALUE os)
{
static const char filename[] = "rubyobj";
VALUE obj = Qnil, opts = Qnil, output;
struct dump_config dc = {0,};
rb_scan_args(argc, argv, "1:", &obj, &opts);
output = dump_output(&dc, opts, sym_string, filename);
dump_object(obj, &dc);
return dump_result(&dc, output);
}
/*
* call-seq:
* ObjectSpace.dump_all([output: :file]) # => #<File:/tmp/rubyheap20131125-88469-laoj3v.json>
* ObjectSpace.dump_all(output: :stdout) # => nil
* ObjectSpace.dump_all(output: :string) # => "{...}\n{...}\n..."
* ObjectSpace.dump_all(output:
* File.open('heap.json','w')) # => #<File:heap.json>
* ObjectSpace.dump_all(output: :string,
* since: 42) # => "{...}\n{...}\n..."
*
* Dump the contents of the ruby heap as JSON.
*
* _since_ must be a non-negative integer or +nil+.
*
* If _since_ is a positive integer, only objects of that generation and
* newer generations are dumped. The current generation can be accessed using
* GC::count.
*
* Objects that were allocated without object allocation tracing enabled
* are ignored. See ::trace_object_allocations for more information and
* examples.
*
* If _since_ is omitted or is +nil+, all objects are dumped.
*
* This method is only expected to work with C Ruby.
* This is an experimental method and is subject to change.
* In particular, the function signature and output format are
* not guaranteed to be compatible in future versions of ruby.
*/
static VALUE
objspace_dump_all(int argc, VALUE *argv, VALUE os)
{
static const char filename[] = "rubyheap";
VALUE opts = Qnil, output;
struct dump_config dc = {0,};
rb_scan_args(argc, argv, "0:", &opts);
output = dump_output(&dc, opts, sym_file, filename);
if (!dc.partial_dump || dc.since == 0) {
/* dump roots */
rb_objspace_reachable_objects_from_root(root_obj_i, &dc);
if (dc.roots) dump_append(&dc, "]}\n");
}
/* dump all objects */
rb_objspace_each_objects(heap_i, &dc);
return dump_result(&dc, output);
}
void
Init_objspace_dump(VALUE rb_mObjSpace)
{
#undef rb_intern
#if 0
rb_mObjSpace = rb_define_module("ObjectSpace"); /* let rdoc know */
#endif
rb_define_module_function(rb_mObjSpace, "dump", objspace_dump, -1);
rb_define_module_function(rb_mObjSpace, "dump_all", objspace_dump_all, -1);
sym_output = ID2SYM(rb_intern("output"));
sym_stdout = ID2SYM(rb_intern("stdout"));
sym_string = ID2SYM(rb_intern("string"));
sym_since = ID2SYM(rb_intern("since"));
sym_file = ID2SYM(rb_intern("file"));
sym_full = ID2SYM(rb_intern("full"));
/* force create static IDs */
rb_obj_gc_flags(rb_mObjSpace, 0, 0);
}