diff --git a/inits.c b/inits.c index 13fa069236..c9687de516 100644 --- a/inits.c +++ b/inits.c @@ -61,6 +61,7 @@ rb_call_inits(void) CALL(Complex); CALL(version); CALL(vm_trace); + CALL(vm_stack_canary); CALL(ast); } #undef CALL diff --git a/insns.def b/insns.def index 897da9d0c5..cf9d980277 100644 --- a/insns.def +++ b/insns.def @@ -45,6 +45,9 @@ * handles_sp: If it is true, VM deals with sp in the insn. + * leaf: indicates that the instruction is "leaf" i.e. it does + not introduce new stack frame on top of it. Default true. + - Attributes can access operands, but not stack (push/pop) variables. - An instruction's body is a pure C block, copied verbatimly into @@ -203,6 +206,8 @@ getinstancevariable (ID id, IC ic) () (VALUE val) +/* "instance variable not initialized" warning can be hooked. */ +// attr bool leaf = false; /* has rb_warning() */ { val = vm_getinstancevariable(GET_SELF(), id, ic); } @@ -223,6 +228,8 @@ getclassvariable (ID id) () (VALUE val) +/* "class variable access from toplevel" warning can be hooked. */ +// attr bool leaf = false; /* has rb_warning() */ { val = rb_cvar_get(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id); } @@ -233,6 +240,8 @@ setclassvariable (ID id) (VALUE val) () +/* "class variable access from toplevel" warning can be hooked. */ +// attr bool leaf = false; /* has rb_warning() */ { vm_ensure_not_refinement_module(GET_SELF()); rb_cvar_set(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id, val); @@ -247,6 +256,8 @@ getconstant (ID id) (VALUE klass) (VALUE val) +/* getconstant can kick autoload */ +// attr bool leaf = false; /* has rb_autoload_load() */ { val = vm_get_ev_const(ec, klass, id, 0); } @@ -258,6 +269,11 @@ setconstant (ID id) (VALUE val, VALUE cbase) () +/* Assigning an object to a constant is basically a leaf operation. + * The problem is, assigning a Module instance to a constant _names_ + * that module. Naming involves string manipulations, which are + * method calls. */ +// attr bool leaf = false; /* has StringValue() */ { vm_check_if_namespace(cbase); vm_ensure_not_refinement_module(GET_SELF()); @@ -270,6 +286,7 @@ getglobal (GENTRY entry) () (VALUE val) +// attr bool leaf = leafness_of_getglobal(entry); { val = GET_GLOBAL((VALUE)entry); } @@ -280,6 +297,7 @@ setglobal (GENTRY entry) (VALUE val) () +// attr bool leaf = leafness_of_setglobal(entry); { SET_GLOBAL((VALUE)entry, val); } @@ -339,6 +357,7 @@ putiseq (ISEQ iseq) () (VALUE ret) +// attr bool leaf = true; /* yes it is */ { ret = (VALUE)iseq; } @@ -392,6 +411,9 @@ toregexp (rb_num_t opt, rb_num_t cnt) (...) (VALUE val) +/* This instruction has StringValue(), which is a method call. But it + * seems that path is never covered. */ +// attr bool leaf = true; /* yes it is */ // attr rb_snum_t sp_inc = 1 - cnt; { const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt)); @@ -444,6 +466,7 @@ expandarray (rb_num_t num, rb_num_t flag) (..., VALUE ary) (...) +// attr bool leaf = false; /* has rb_check_array_type() */ // attr rb_snum_t sp_inc = num - 1 + (flag & 1 ? 1 : 0); { vm_expandarray(GET_SP(), ary, num, (int)flag); @@ -455,6 +478,7 @@ concatarray () (VALUE ary1, VALUE ary2) (VALUE ary) +// attr bool leaf = false; /* has rb_check_array_type() */ { ary = vm_concat_array(ary1, ary2); } @@ -465,6 +489,7 @@ splatarray (VALUE flag) (VALUE ary) (VALUE obj) +// attr bool leaf = false; /* has rb_check_array_type() */ { obj = vm_splat_array(flag, ary); } @@ -475,6 +500,7 @@ newhash (rb_num_t num) (...) (VALUE val) +// attr bool leaf = false; /* has rb_hash_key_str() */ // attr rb_snum_t sp_inc = 1 - num; { RUBY_DTRACE_CREATE_HOOK(HASH, num); @@ -492,6 +518,8 @@ newrange (rb_num_t flag) (VALUE low, VALUE high) (VALUE val) +/* rb_range_new() exercises "bad value for range" check. */ +// attr bool leaf = false; /* see also: range.c:range_init() */ { val = rb_range_new(low, high, (int)flag); } @@ -618,6 +646,7 @@ defined (rb_num_t op_type, VALUE obj, VALUE needstr) (VALUE v) (VALUE val) +// attr bool leaf = leafness_of_defined(op_type); { val = vm_defined(ec, GET_CFP(), op_type, obj, needstr, v); } @@ -634,6 +663,7 @@ checkmatch (rb_num_t flag) (VALUE target, VALUE pattern) (VALUE result) +// attr bool leaf = leafness_of_checkmatch(flag); { result = vm_check_match(ec, target, pattern, flag); } @@ -726,6 +756,7 @@ opt_str_freeze (VALUE str) () (VALUE val) +// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG); { val = vm_opt_str_freeze(str, BOP_FREEZE, idFreeze); } @@ -735,6 +766,7 @@ opt_str_uminus (VALUE str) () (VALUE val) +// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_UMINUS, STRING_REDEFINED_OP_FLAG); { val = vm_opt_str_freeze(str, BOP_UMINUS, idUMinus); } @@ -744,6 +776,11 @@ opt_newarray_max (rb_num_t num) (...) (VALUE val) +/* This instruction typically has no funcalls. But it compares array + * contents each other by nature. That part could call methods when + * necessary. No way to detect such method calls beforehand. We + * cannot but mark it being not leaf. */ +// attr bool leaf = false; /* has rb_funcall() */ // attr rb_snum_t sp_inc = 1 - num; { val = vm_opt_newarray_max(num, STACK_ADDR_FROM_TOP(num)); @@ -754,6 +791,8 @@ opt_newarray_min (rb_num_t num) (...) (VALUE val) +/* Same discussion as opt_newarray_max. */ +// attr bool leaf = false; /* has rb_funcall() */ // attr rb_snum_t sp_inc = 1 - num; { val = vm_opt_newarray_min(num, STACK_ADDR_FROM_TOP(num)); @@ -765,6 +804,7 @@ opt_send_without_block (CALL_INFO ci, CALL_CACHE cc) (...) (VALUE val) +// attr bool leaf = false; /* Of course it isn't. */ // attr bool handles_sp = true; // attr rb_snum_t sp_inc = -ci->orig_argc; { @@ -797,6 +837,7 @@ invokeblock (CALL_INFO ci) (...) (VALUE val) +// attr bool leaf = false; /* Of course it isn't. */ // attr bool handles_sp = true; // attr rb_snum_t sp_inc = 1 - ci->orig_argc; { @@ -824,6 +865,10 @@ leave () (VALUE val) (VALUE val) +/* This is super surprising but when leaving from a frame, we check + * for interrupts. If any, that should be executed on top of the + * current execution context. This is a method call. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ // attr bool handles_sp = true; { if (OPT_CHECKED_RUN) { @@ -858,6 +903,8 @@ throw (rb_num_t throw_state) (VALUE throwobj) (VALUE val) +/* Same discussion as leave. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { RUBY_VM_CHECK_INTS(ec); val = vm_throw(ec, GET_CFP(), throw_state, throwobj); @@ -875,6 +922,8 @@ jump (OFFSET dst) () () +/* Same discussion as leave. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { RUBY_VM_CHECK_INTS(ec); JUMP(dst); @@ -886,6 +935,8 @@ branchif (OFFSET dst) (VALUE val) () +/* Same discussion as jump. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { if (RTEST(val)) { RUBY_VM_CHECK_INTS(ec); @@ -899,6 +950,8 @@ branchunless (OFFSET dst) (VALUE val) () +/* Same discussion as jump. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { if (!RTEST(val)) { RUBY_VM_CHECK_INTS(ec); @@ -912,6 +965,8 @@ branchnil (OFFSET dst) (VALUE val) () +/* Same discussion as jump. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { if (NIL_P(val)) { RUBY_VM_CHECK_INTS(ec); @@ -965,6 +1020,10 @@ opt_case_dispatch (CDHASH hash, OFFSET else_offset) (..., VALUE key) () +/* Case dispatch involves hash lookup, which inevitably compares + * several objects each other. The same discussion to + * opt_newarray_max also goes here. */ +// attr bool leaf = false; /* has rb_any_cmp() */ // attr rb_snum_t sp_inc = -1; { OFFSET dst = vm_case_dispatch(hash, else_offset, key); @@ -982,6 +1041,9 @@ opt_plus (CALL_INFO ci, CALL_CACHE cc) (VALUE recv, VALUE obj) (VALUE val) +/* Array + anything can be handled inside of opt_plus, and that + * anything is converted into array using #to_ary. */ +// attr bool leaf = false; /* has rb_to_array_type() */ { val = vm_opt_plus(recv, obj); @@ -1067,6 +1129,10 @@ opt_eq (CALL_INFO ci, CALL_CACHE cc) (VALUE recv, VALUE obj) (VALUE val) +/* This instruction can compare a string with non-string. This + * (somewhat) coerces the non-string into a string, via a method + * call. */ +// attr bool leaf = false; /* has rb_str_equal() */ { val = opt_eq_func(recv, obj, ci, cc); @@ -1084,6 +1150,8 @@ opt_neq (CALL_INFO ci_eq, CALL_CACHE cc_eq, CALL_INFO ci, CALL_CACHE cc) (VALUE recv, VALUE obj) (VALUE val) +/* Same discussion as opt_eq. */ +// attr bool leaf = false; /* has rb_str_equal() */ { val = vm_opt_neq(ci, cc, ci_eq, cc_eq, recv, obj); @@ -1186,6 +1254,11 @@ opt_aref (CALL_INFO ci, CALL_CACHE cc) (VALUE recv, VALUE obj) (VALUE val) +/* This is complicated. In case of hash, vm_opt_aref() resorts to + * rb_hash_aref(). If `recv` has no `obj`, this function then yields + * default_proc. This is a method call. So opt_aref is + * (surprisingly) not leaf. */ +// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */ { val = vm_opt_aref(recv, obj); @@ -1203,6 +1276,9 @@ opt_aset (CALL_INFO ci, CALL_CACHE cc) (VALUE recv, VALUE obj, VALUE set) (VALUE val) +/* This is another story than opt_aref. When vm_opt_aset() resorts + * to rb_hash_aset(), which should call #hash for `obj`. */ +// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */ { val = vm_opt_aset(recv, obj, set); @@ -1220,6 +1296,8 @@ opt_aset_with (VALUE key, CALL_INFO ci, CALL_CACHE cc) (VALUE recv, VALUE val) (VALUE val) +/* Same discussion as opt_aset. */ +// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */ { VALUE tmp = vm_opt_aset_with(recv, key, val); @@ -1242,6 +1320,8 @@ opt_aref_with (VALUE key, CALL_INFO ci, CALL_CACHE cc) (VALUE recv) (VALUE val) +/* Same discussion as opt_aref. */ +// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */ { val = vm_opt_aref_with(recv, key); @@ -1345,6 +1425,7 @@ opt_regexpmatch1 (VALUE recv) (VALUE obj) (VALUE val) +// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG); { val = vm_opt_regexpmatch1(recv, obj); } @@ -1372,6 +1453,7 @@ opt_call_c_function (rb_insn_func_t funcptr) () () +// attr bool leaf = false; /* anything can happen inside */ // attr bool handles_sp = true; { reg_cfp = (funcptr)(ec, reg_cfp); diff --git a/internal.h b/internal.h index 8cbe16aaff..bfbd90bc5e 100644 --- a/internal.h +++ b/internal.h @@ -1901,6 +1901,9 @@ VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef); void rb_autoload_str(VALUE mod, ID id, VALUE file); void rb_deprecate_constant(VALUE mod, const char *name); NORETURN(VALUE rb_mod_const_missing(VALUE,VALUE)); +rb_gvar_getter_t *rb_gvar_getter_function_of(const struct rb_global_entry *); +rb_gvar_setter_t *rb_gvar_setter_function_of(const struct rb_global_entry *); +bool rb_gvar_is_traced(const struct rb_global_entry *); /* vm_insnhelper.h */ rb_serial_t rb_next_class_serial(void); @@ -1946,6 +1949,7 @@ VALUE rb_lambda_call(VALUE obj, ID mid, int argc, const VALUE *argv, /* vm_insnhelper.c */ VALUE rb_equal_opt(VALUE obj1, VALUE obj2); VALUE rb_eql_opt(VALUE obj1, VALUE obj2); +void Init_vm_stack_canary(void); /* vm_method.c */ void Init_eval_method(void); @@ -2099,6 +2103,9 @@ VALUE rb_imemo_new_debug(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VAL VALUE rb_imemo_new(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VALUE v0); #endif +/* random.c */ +int fill_random_bytes(void *, size_t, int); + RUBY_SYMBOL_EXPORT_END #define RUBY_DTRACE_CREATE_HOOK(name, arg) \ diff --git a/random.c b/random.c index 14588c75d7..d8cfd38016 100644 --- a/random.c +++ b/random.c @@ -573,7 +573,7 @@ fill_random_bytes_syscall(void *seed, size_t size, int need_secure) # define fill_random_bytes_syscall(seed, size, need_secure) -1 #endif -static int +int fill_random_bytes(void *seed, size_t size, int need_secure) { int ret = fill_random_bytes_syscall(seed, size, need_secure); diff --git a/tool/ruby_vm/models/bare_instructions.rb b/tool/ruby_vm/models/bare_instructions.rb index d2ff1341a0..5c27af3a38 100755 --- a/tool/ruby_vm/models/bare_instructions.rb +++ b/tool/ruby_vm/models/bare_instructions.rb @@ -105,6 +105,10 @@ class RubyVM::BareInstructions /\b(false|0)\b/ !~ @attrs['handles_sp'].expr.expr end + def complicated_return_values? + @sig[:ret].any? {|i| i == '...' } + end + def inspect sprintf "#<%s %s@%s:%d>", self.class.name, @name, @loc[0], @loc[1] end @@ -130,6 +134,10 @@ class RubyVM::BareInstructions generate_attribute 'rb_num_t', 'width', width generate_attribute 'rb_snum_t', 'sp_inc', rets.size - pops.size generate_attribute 'bool', 'handles_sp', false + generate_attribute 'bool', 'leaf', opes.all? {|o| + # Insn with ISEQ should yield it; can never be a leaf. + o[:type] != 'ISEQ' + } end def typesplit a diff --git a/tool/ruby_vm/views/_insn_entry.erb b/tool/ruby_vm/views/_insn_entry.erb index e3e3ea42ea..90a489f624 100644 --- a/tool/ruby_vm/views/_insn_entry.erb +++ b/tool/ruby_vm/views/_insn_entry.erb @@ -13,6 +13,12 @@ INSN_ENTRY(<%= insn.name %>) %# NAME_OF_CURRENT_INSN is used in vm_exec.h # define NAME_OF_CURRENT_INSN <%= insn.name %> # define INSN_ATTR(x) <%= insn.call_attribute(' ## x ## ') %> +% unless insn.complicated_return_values? +# if VM_CHECK_MODE > 0 + bool leaf; + VALUE *canary; +# endif +% end % unless insn.declarations.empty? <%= insn.declarations.join(";\n ") %>; @@ -32,6 +38,14 @@ INSN_ENTRY(<%= insn.name %>) ADD_PC(INSN_ATTR(width)); % if insn.handles_sp? POPN(INSN_ATTR(popn)); +% end +% unless insn.complicated_return_values? +#if VM_CHECK_MODE > 0 + if ((leaf = INSN_ATTR(leaf))) { + canary = STACK_ADDR_FROM_TOP(-1); + *canary = vm_stack_canary; + } +#endif % end COLLECT_USAGE_INSN(INSN_ATTR(bin)); % insn.opes.each_with_index do |ope, i| @@ -39,6 +53,13 @@ INSN_ENTRY(<%= insn.name %>) % end <%= render_c_expr insn.expr -%> CHECK_VM_STACK_OVERFLOW_FOR_INSN(VM_REG_CFP, INSN_ATTR(retn)); +% unless insn.complicated_return_values? +#if VM_CHECK_MODE > 0 + if (leaf && (*canary != vm_stack_canary)) { + vm_canary_is_found_dead(INSN_ATTR(bin), *canary); + } +#endif +% end % if insn.handles_sp? % insn.rets.reverse_each do |ret| PUSH(<%= insn.cast_to_VALUE ret %>); diff --git a/tool/ruby_vm/views/_leaf_helpers.erb b/tool/ruby_vm/views/_leaf_helpers.erb new file mode 100644 index 0000000000..00c2e3882a --- /dev/null +++ b/tool/ruby_vm/views/_leaf_helpers.erb @@ -0,0 +1,105 @@ +%# -*- mode:c; style:ruby; coding: utf-8; indent-tabs-mode: nil -*- +%# Copyright (c) 2018 Urabe, Shyouhei. All rights reserved. +%# +%# This file is a part of the programming language Ruby. Permission is hereby +%# granted, to either redistribute and/or modify this file, provided that the +%# conditions mentioned in the file COPYING are met. Consult the file for +%# details. +%; + +static bool +leafness_of_getglobal(VALUE gentry) +{ + const struct rb_global_entry *e = (void *)gentry; + + if (UNLIKELY(rb_gvar_is_traced(e))) { + return false; + } + else { + /* We cannot write this function using a switch() because a + * case label cannot be a function pointer. */ + static rb_gvar_getter_t *const whitelist[] = { + rb_gvar_val_getter, + rb_gvar_var_getter, + rb_gvar_undef_getter, + }; + rb_gvar_getter_t *f = rb_gvar_getter_function_of(e); + int i; + + for (i = 0; i < numberof(whitelist); i++) { + if (f == whitelist[i]) { + return true; + } + } + return false; + } +} + +static bool +leafness_of_setglobal(VALUE gentry) +{ + const struct rb_global_entry *e = (void *)gentry; + + if (UNLIKELY(rb_gvar_is_traced(e))) { + return false; + } + else { + /* We cannot write this function using a switch() because a + * case label cannot be a function pointer. */ + static rb_gvar_setter_t *const whitelist[] = { + rb_gvar_val_setter, + rb_gvar_readonly_setter, + rb_gvar_var_setter, + rb_gvar_undef_setter, + }; + rb_gvar_setter_t *f = rb_gvar_setter_function_of(e); + int i; + + for (i = 0; i < numberof(whitelist); i++) { + if (f == whitelist[i]) { + return true; + } + } + return false; + } +} + +#include "iseq.h" + +static bool +leafness_of_defined(rb_num_t op_type) +{ + /* see also: vm_insnhelper.c:vm_defined() */ + switch (op_type) { + case DEFINED_IVAR: + case DEFINED_IVAR2: + case DEFINED_GVAR: + case DEFINED_CVAR: + case DEFINED_YIELD: + case DEFINED_REF: + case DEFINED_ZSUPER: + return false; + case DEFINED_CONST: + /* has rb_autoload_load(); */ + return false; + case DEFINED_FUNC: + case DEFINED_METHOD: + /* calls #respond_to_missing? */ + return false; + default: + rb_bug("unknown operand %ld: blame @shyouhei.", op_type); + } +} + +static bool +leafness_of_checkmatch(rb_num_t flag) +{ + /* see also: vm_insnhelper.c:check_match() */ + if (flag == VM_CHECKMATCH_TYPE_WHEN) { + return true; + } + else { + /* has rb_funcallv() */ + return false; + } +} diff --git a/tool/ruby_vm/views/insns_info.inc.erb b/tool/ruby_vm/views/insns_info.inc.erb index e08a15e5ef..67fcebfc42 100644 --- a/tool/ruby_vm/views/insns_info.inc.erb +++ b/tool/ruby_vm/views/insns_info.inc.erb @@ -15,5 +15,6 @@ <%= render 'insn_name_info' %> <%= render 'insn_len_info' %> <%= render 'insn_operand_info' %> +<%= render 'leaf_helpers' %> <%= render 'attributes' %> <%= render 'insn_stack_increase' %> diff --git a/variable.c b/variable.c index 32e71fccb4..1c23173d6c 100644 --- a/variable.c +++ b/variable.c @@ -867,6 +867,24 @@ rb_gvar_defined(struct rb_global_entry *entry) return Qtrue; } +rb_gvar_getter_t * +rb_gvar_getter_function_of(const struct rb_global_entry *entry) +{ + return entry->var->getter; +} + +rb_gvar_setter_t * +rb_gvar_setter_function_of(const struct rb_global_entry *entry) +{ + return entry->var->setter; +} + +bool +rb_gvar_is_traced(const struct rb_global_entry *entry) +{ + return !!entry->var->trace; +} + static enum rb_id_table_iterator_result gvar_i(ID key, VALUE val, void *a) { diff --git a/vm_insnhelper.c b/vm_insnhelper.c index f82b0e075b..702e22291b 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3900,3 +3900,38 @@ vm_trace(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE *p } } } + +#if VM_CHECK_MODE > 0 +static NORETURN( NOINLINE( +#if GCC_VERSION_SINCE(4, 3, 0) +__attribute__((__cold__)) +#endif +void vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c))); +static VALUE vm_stack_canary; + +void +Init_vm_stack_canary(void) +{ + /* This has to be called _after_ our PRNG is properly set up. */ + int n = fill_random_bytes(&vm_stack_canary, sizeof vm_stack_canary, false); + + VM_ASSERT(n == 0); +} + +static void +vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c) +{ + /* Because a method has already been called, why not call + * another one. */ + const char *insn = rb_insns_name(i); + VALUE inspection = rb_inspect(c); + const char *str = StringValueCStr(inspection); + VALUE message = rb_sprintf("dead canary found at %s: %s", insn, str); + const char *msg = StringValueCStr(message); + + rb_bug(msg); +} + +#elif !defined(MJIT_HEADER) +void Init_vm_stack_canary(void) { /* nothing to do */ } +#endif