1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

add new instruction attribute called leaf

An instruction is leaf if it has no rb_funcall inside.  In order to
check this property, we introduce stack canary which is a random
number collected at runtime.  Stack top is always filled with this
number and checked for stack smashing operations, when VM_CHECK_MODE.
[GH-1947]


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@64677 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
shyouhei 2018-09-11 09:48:58 +00:00
parent ecda2d23d5
commit c2bfb4e93c
10 changed files with 279 additions and 1 deletions

View file

@ -61,6 +61,7 @@ rb_call_inits(void)
CALL(Complex);
CALL(version);
CALL(vm_trace);
CALL(vm_stack_canary);
CALL(ast);
}
#undef CALL

View file

@ -45,6 +45,9 @@
* handles_sp: If it is true, VM deals with sp in the insn.
* leaf: indicates that the instruction is "leaf" i.e. it does
not introduce new stack frame on top of it. Default true.
- Attributes can access operands, but not stack (push/pop) variables.
- An instruction's body is a pure C block, copied verbatimly into
@ -203,6 +206,8 @@ getinstancevariable
(ID id, IC ic)
()
(VALUE val)
/* "instance variable not initialized" warning can be hooked. */
// attr bool leaf = false; /* has rb_warning() */
{
val = vm_getinstancevariable(GET_SELF(), id, ic);
}
@ -223,6 +228,8 @@ getclassvariable
(ID id)
()
(VALUE val)
/* "class variable access from toplevel" warning can be hooked. */
// attr bool leaf = false; /* has rb_warning() */
{
val = rb_cvar_get(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id);
}
@ -233,6 +240,8 @@ setclassvariable
(ID id)
(VALUE val)
()
/* "class variable access from toplevel" warning can be hooked. */
// attr bool leaf = false; /* has rb_warning() */
{
vm_ensure_not_refinement_module(GET_SELF());
rb_cvar_set(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id, val);
@ -247,6 +256,8 @@ getconstant
(ID id)
(VALUE klass)
(VALUE val)
/* getconstant can kick autoload */
// attr bool leaf = false; /* has rb_autoload_load() */
{
val = vm_get_ev_const(ec, klass, id, 0);
}
@ -258,6 +269,11 @@ setconstant
(ID id)
(VALUE val, VALUE cbase)
()
/* Assigning an object to a constant is basically a leaf operation.
* The problem is, assigning a Module instance to a constant _names_
* that module. Naming involves string manipulations, which are
* method calls. */
// attr bool leaf = false; /* has StringValue() */
{
vm_check_if_namespace(cbase);
vm_ensure_not_refinement_module(GET_SELF());
@ -270,6 +286,7 @@ getglobal
(GENTRY entry)
()
(VALUE val)
// attr bool leaf = leafness_of_getglobal(entry);
{
val = GET_GLOBAL((VALUE)entry);
}
@ -280,6 +297,7 @@ setglobal
(GENTRY entry)
(VALUE val)
()
// attr bool leaf = leafness_of_setglobal(entry);
{
SET_GLOBAL((VALUE)entry, val);
}
@ -339,6 +357,7 @@ putiseq
(ISEQ iseq)
()
(VALUE ret)
// attr bool leaf = true; /* yes it is */
{
ret = (VALUE)iseq;
}
@ -392,6 +411,9 @@ toregexp
(rb_num_t opt, rb_num_t cnt)
(...)
(VALUE val)
/* This instruction has StringValue(), which is a method call. But it
* seems that path is never covered. */
// attr bool leaf = true; /* yes it is */
// attr rb_snum_t sp_inc = 1 - cnt;
{
const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt));
@ -444,6 +466,7 @@ expandarray
(rb_num_t num, rb_num_t flag)
(..., VALUE ary)
(...)
// attr bool leaf = false; /* has rb_check_array_type() */
// attr rb_snum_t sp_inc = num - 1 + (flag & 1 ? 1 : 0);
{
vm_expandarray(GET_SP(), ary, num, (int)flag);
@ -455,6 +478,7 @@ concatarray
()
(VALUE ary1, VALUE ary2)
(VALUE ary)
// attr bool leaf = false; /* has rb_check_array_type() */
{
ary = vm_concat_array(ary1, ary2);
}
@ -465,6 +489,7 @@ splatarray
(VALUE flag)
(VALUE ary)
(VALUE obj)
// attr bool leaf = false; /* has rb_check_array_type() */
{
obj = vm_splat_array(flag, ary);
}
@ -475,6 +500,7 @@ newhash
(rb_num_t num)
(...)
(VALUE val)
// attr bool leaf = false; /* has rb_hash_key_str() */
// attr rb_snum_t sp_inc = 1 - num;
{
RUBY_DTRACE_CREATE_HOOK(HASH, num);
@ -492,6 +518,8 @@ newrange
(rb_num_t flag)
(VALUE low, VALUE high)
(VALUE val)
/* rb_range_new() exercises "bad value for range" check. */
// attr bool leaf = false; /* see also: range.c:range_init() */
{
val = rb_range_new(low, high, (int)flag);
}
@ -618,6 +646,7 @@ defined
(rb_num_t op_type, VALUE obj, VALUE needstr)
(VALUE v)
(VALUE val)
// attr bool leaf = leafness_of_defined(op_type);
{
val = vm_defined(ec, GET_CFP(), op_type, obj, needstr, v);
}
@ -634,6 +663,7 @@ checkmatch
(rb_num_t flag)
(VALUE target, VALUE pattern)
(VALUE result)
// attr bool leaf = leafness_of_checkmatch(flag);
{
result = vm_check_match(ec, target, pattern, flag);
}
@ -726,6 +756,7 @@ opt_str_freeze
(VALUE str)
()
(VALUE val)
// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG);
{
val = vm_opt_str_freeze(str, BOP_FREEZE, idFreeze);
}
@ -735,6 +766,7 @@ opt_str_uminus
(VALUE str)
()
(VALUE val)
// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_UMINUS, STRING_REDEFINED_OP_FLAG);
{
val = vm_opt_str_freeze(str, BOP_UMINUS, idUMinus);
}
@ -744,6 +776,11 @@ opt_newarray_max
(rb_num_t num)
(...)
(VALUE val)
/* This instruction typically has no funcalls. But it compares array
* contents each other by nature. That part could call methods when
* necessary. No way to detect such method calls beforehand. We
* cannot but mark it being not leaf. */
// attr bool leaf = false; /* has rb_funcall() */
// attr rb_snum_t sp_inc = 1 - num;
{
val = vm_opt_newarray_max(num, STACK_ADDR_FROM_TOP(num));
@ -754,6 +791,8 @@ opt_newarray_min
(rb_num_t num)
(...)
(VALUE val)
/* Same discussion as opt_newarray_max. */
// attr bool leaf = false; /* has rb_funcall() */
// attr rb_snum_t sp_inc = 1 - num;
{
val = vm_opt_newarray_min(num, STACK_ADDR_FROM_TOP(num));
@ -765,6 +804,7 @@ opt_send_without_block
(CALL_INFO ci, CALL_CACHE cc)
(...)
(VALUE val)
// attr bool leaf = false; /* Of course it isn't. */
// attr bool handles_sp = true;
// attr rb_snum_t sp_inc = -ci->orig_argc;
{
@ -797,6 +837,7 @@ invokeblock
(CALL_INFO ci)
(...)
(VALUE val)
// attr bool leaf = false; /* Of course it isn't. */
// attr bool handles_sp = true;
// attr rb_snum_t sp_inc = 1 - ci->orig_argc;
{
@ -824,6 +865,10 @@ leave
()
(VALUE val)
(VALUE val)
/* This is super surprising but when leaving from a frame, we check
* for interrupts. If any, that should be executed on top of the
* current execution context. This is a method call. */
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
// attr bool handles_sp = true;
{
if (OPT_CHECKED_RUN) {
@ -858,6 +903,8 @@ throw
(rb_num_t throw_state)
(VALUE throwobj)
(VALUE val)
/* Same discussion as leave. */
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
{
RUBY_VM_CHECK_INTS(ec);
val = vm_throw(ec, GET_CFP(), throw_state, throwobj);
@ -875,6 +922,8 @@ jump
(OFFSET dst)
()
()
/* Same discussion as leave. */
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
{
RUBY_VM_CHECK_INTS(ec);
JUMP(dst);
@ -886,6 +935,8 @@ branchif
(OFFSET dst)
(VALUE val)
()
/* Same discussion as jump. */
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
{
if (RTEST(val)) {
RUBY_VM_CHECK_INTS(ec);
@ -899,6 +950,8 @@ branchunless
(OFFSET dst)
(VALUE val)
()
/* Same discussion as jump. */
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
{
if (!RTEST(val)) {
RUBY_VM_CHECK_INTS(ec);
@ -912,6 +965,8 @@ branchnil
(OFFSET dst)
(VALUE val)
()
/* Same discussion as jump. */
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
{
if (NIL_P(val)) {
RUBY_VM_CHECK_INTS(ec);
@ -965,6 +1020,10 @@ opt_case_dispatch
(CDHASH hash, OFFSET else_offset)
(..., VALUE key)
()
/* Case dispatch involves hash lookup, which inevitably compares
* several objects each other. The same discussion to
* opt_newarray_max also goes here. */
// attr bool leaf = false; /* has rb_any_cmp() */
// attr rb_snum_t sp_inc = -1;
{
OFFSET dst = vm_case_dispatch(hash, else_offset, key);
@ -982,6 +1041,9 @@ opt_plus
(CALL_INFO ci, CALL_CACHE cc)
(VALUE recv, VALUE obj)
(VALUE val)
/* Array + anything can be handled inside of opt_plus, and that
* anything is converted into array using #to_ary. */
// attr bool leaf = false; /* has rb_to_array_type() */
{
val = vm_opt_plus(recv, obj);
@ -1067,6 +1129,10 @@ opt_eq
(CALL_INFO ci, CALL_CACHE cc)
(VALUE recv, VALUE obj)
(VALUE val)
/* This instruction can compare a string with non-string. This
* (somewhat) coerces the non-string into a string, via a method
* call. */
// attr bool leaf = false; /* has rb_str_equal() */
{
val = opt_eq_func(recv, obj, ci, cc);
@ -1084,6 +1150,8 @@ opt_neq
(CALL_INFO ci_eq, CALL_CACHE cc_eq, CALL_INFO ci, CALL_CACHE cc)
(VALUE recv, VALUE obj)
(VALUE val)
/* Same discussion as opt_eq. */
// attr bool leaf = false; /* has rb_str_equal() */
{
val = vm_opt_neq(ci, cc, ci_eq, cc_eq, recv, obj);
@ -1186,6 +1254,11 @@ opt_aref
(CALL_INFO ci, CALL_CACHE cc)
(VALUE recv, VALUE obj)
(VALUE val)
/* This is complicated. In case of hash, vm_opt_aref() resorts to
* rb_hash_aref(). If `recv` has no `obj`, this function then yields
* default_proc. This is a method call. So opt_aref is
* (surprisingly) not leaf. */
// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */
{
val = vm_opt_aref(recv, obj);
@ -1203,6 +1276,9 @@ opt_aset
(CALL_INFO ci, CALL_CACHE cc)
(VALUE recv, VALUE obj, VALUE set)
(VALUE val)
/* This is another story than opt_aref. When vm_opt_aset() resorts
* to rb_hash_aset(), which should call #hash for `obj`. */
// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
{
val = vm_opt_aset(recv, obj, set);
@ -1220,6 +1296,8 @@ opt_aset_with
(VALUE key, CALL_INFO ci, CALL_CACHE cc)
(VALUE recv, VALUE val)
(VALUE val)
/* Same discussion as opt_aset. */
// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
{
VALUE tmp = vm_opt_aset_with(recv, key, val);
@ -1242,6 +1320,8 @@ opt_aref_with
(VALUE key, CALL_INFO ci, CALL_CACHE cc)
(VALUE recv)
(VALUE val)
/* Same discussion as opt_aref. */
// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */
{
val = vm_opt_aref_with(recv, key);
@ -1345,6 +1425,7 @@ opt_regexpmatch1
(VALUE recv)
(VALUE obj)
(VALUE val)
// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG);
{
val = vm_opt_regexpmatch1(recv, obj);
}
@ -1372,6 +1453,7 @@ opt_call_c_function
(rb_insn_func_t funcptr)
()
()
// attr bool leaf = false; /* anything can happen inside */
// attr bool handles_sp = true;
{
reg_cfp = (funcptr)(ec, reg_cfp);

View file

@ -1901,6 +1901,9 @@ VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef);
void rb_autoload_str(VALUE mod, ID id, VALUE file);
void rb_deprecate_constant(VALUE mod, const char *name);
NORETURN(VALUE rb_mod_const_missing(VALUE,VALUE));
rb_gvar_getter_t *rb_gvar_getter_function_of(const struct rb_global_entry *);
rb_gvar_setter_t *rb_gvar_setter_function_of(const struct rb_global_entry *);
bool rb_gvar_is_traced(const struct rb_global_entry *);
/* vm_insnhelper.h */
rb_serial_t rb_next_class_serial(void);
@ -1946,6 +1949,7 @@ VALUE rb_lambda_call(VALUE obj, ID mid, int argc, const VALUE *argv,
/* vm_insnhelper.c */
VALUE rb_equal_opt(VALUE obj1, VALUE obj2);
VALUE rb_eql_opt(VALUE obj1, VALUE obj2);
void Init_vm_stack_canary(void);
/* vm_method.c */
void Init_eval_method(void);
@ -2099,6 +2103,9 @@ VALUE rb_imemo_new_debug(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VAL
VALUE rb_imemo_new(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VALUE v0);
#endif
/* random.c */
int fill_random_bytes(void *, size_t, int);
RUBY_SYMBOL_EXPORT_END
#define RUBY_DTRACE_CREATE_HOOK(name, arg) \

View file

@ -573,7 +573,7 @@ fill_random_bytes_syscall(void *seed, size_t size, int need_secure)
# define fill_random_bytes_syscall(seed, size, need_secure) -1
#endif
static int
int
fill_random_bytes(void *seed, size_t size, int need_secure)
{
int ret = fill_random_bytes_syscall(seed, size, need_secure);

View file

@ -105,6 +105,10 @@ class RubyVM::BareInstructions
/\b(false|0)\b/ !~ @attrs['handles_sp'].expr.expr
end
def complicated_return_values?
@sig[:ret].any? {|i| i == '...' }
end
def inspect
sprintf "#<%s %s@%s:%d>", self.class.name, @name, @loc[0], @loc[1]
end
@ -130,6 +134,10 @@ class RubyVM::BareInstructions
generate_attribute 'rb_num_t', 'width', width
generate_attribute 'rb_snum_t', 'sp_inc', rets.size - pops.size
generate_attribute 'bool', 'handles_sp', false
generate_attribute 'bool', 'leaf', opes.all? {|o|
# Insn with ISEQ should yield it; can never be a leaf.
o[:type] != 'ISEQ'
}
end
def typesplit a

View file

@ -13,6 +13,12 @@ INSN_ENTRY(<%= insn.name %>)
%# NAME_OF_CURRENT_INSN is used in vm_exec.h
# define NAME_OF_CURRENT_INSN <%= insn.name %>
# define INSN_ATTR(x) <%= insn.call_attribute(' ## x ## ') %>
% unless insn.complicated_return_values?
# if VM_CHECK_MODE > 0
bool leaf;
VALUE *canary;
# endif
% end
% unless insn.declarations.empty?
<%= insn.declarations.join(";\n ") %>;
@ -32,6 +38,14 @@ INSN_ENTRY(<%= insn.name %>)
ADD_PC(INSN_ATTR(width));
% if insn.handles_sp?
POPN(INSN_ATTR(popn));
% end
% unless insn.complicated_return_values?
#if VM_CHECK_MODE > 0
if ((leaf = INSN_ATTR(leaf))) {
canary = STACK_ADDR_FROM_TOP(-1);
*canary = vm_stack_canary;
}
#endif
% end
COLLECT_USAGE_INSN(INSN_ATTR(bin));
% insn.opes.each_with_index do |ope, i|
@ -39,6 +53,13 @@ INSN_ENTRY(<%= insn.name %>)
% end
<%= render_c_expr insn.expr -%>
CHECK_VM_STACK_OVERFLOW_FOR_INSN(VM_REG_CFP, INSN_ATTR(retn));
% unless insn.complicated_return_values?
#if VM_CHECK_MODE > 0
if (leaf && (*canary != vm_stack_canary)) {
vm_canary_is_found_dead(INSN_ATTR(bin), *canary);
}
#endif
% end
% if insn.handles_sp?
% insn.rets.reverse_each do |ret|
PUSH(<%= insn.cast_to_VALUE ret %>);

View file

@ -0,0 +1,105 @@
%# -*- mode:c; style:ruby; coding: utf-8; indent-tabs-mode: nil -*-
%# Copyright (c) 2018 Urabe, Shyouhei. All rights reserved.
%#
%# This file is a part of the programming language Ruby. Permission is hereby
%# granted, to either redistribute and/or modify this file, provided that the
%# conditions mentioned in the file COPYING are met. Consult the file for
%# details.
%;
static bool
leafness_of_getglobal(VALUE gentry)
{
const struct rb_global_entry *e = (void *)gentry;
if (UNLIKELY(rb_gvar_is_traced(e))) {
return false;
}
else {
/* We cannot write this function using a switch() because a
* case label cannot be a function pointer. */
static rb_gvar_getter_t *const whitelist[] = {
rb_gvar_val_getter,
rb_gvar_var_getter,
rb_gvar_undef_getter,
};
rb_gvar_getter_t *f = rb_gvar_getter_function_of(e);
int i;
for (i = 0; i < numberof(whitelist); i++) {
if (f == whitelist[i]) {
return true;
}
}
return false;
}
}
static bool
leafness_of_setglobal(VALUE gentry)
{
const struct rb_global_entry *e = (void *)gentry;
if (UNLIKELY(rb_gvar_is_traced(e))) {
return false;
}
else {
/* We cannot write this function using a switch() because a
* case label cannot be a function pointer. */
static rb_gvar_setter_t *const whitelist[] = {
rb_gvar_val_setter,
rb_gvar_readonly_setter,
rb_gvar_var_setter,
rb_gvar_undef_setter,
};
rb_gvar_setter_t *f = rb_gvar_setter_function_of(e);
int i;
for (i = 0; i < numberof(whitelist); i++) {
if (f == whitelist[i]) {
return true;
}
}
return false;
}
}
#include "iseq.h"
static bool
leafness_of_defined(rb_num_t op_type)
{
/* see also: vm_insnhelper.c:vm_defined() */
switch (op_type) {
case DEFINED_IVAR:
case DEFINED_IVAR2:
case DEFINED_GVAR:
case DEFINED_CVAR:
case DEFINED_YIELD:
case DEFINED_REF:
case DEFINED_ZSUPER:
return false;
case DEFINED_CONST:
/* has rb_autoload_load(); */
return false;
case DEFINED_FUNC:
case DEFINED_METHOD:
/* calls #respond_to_missing? */
return false;
default:
rb_bug("unknown operand %ld: blame @shyouhei.", op_type);
}
}
static bool
leafness_of_checkmatch(rb_num_t flag)
{
/* see also: vm_insnhelper.c:check_match() */
if (flag == VM_CHECKMATCH_TYPE_WHEN) {
return true;
}
else {
/* has rb_funcallv() */
return false;
}
}

View file

@ -15,5 +15,6 @@
<%= render 'insn_name_info' %>
<%= render 'insn_len_info' %>
<%= render 'insn_operand_info' %>
<%= render 'leaf_helpers' %>
<%= render 'attributes' %>
<%= render 'insn_stack_increase' %>

View file

@ -867,6 +867,24 @@ rb_gvar_defined(struct rb_global_entry *entry)
return Qtrue;
}
rb_gvar_getter_t *
rb_gvar_getter_function_of(const struct rb_global_entry *entry)
{
return entry->var->getter;
}
rb_gvar_setter_t *
rb_gvar_setter_function_of(const struct rb_global_entry *entry)
{
return entry->var->setter;
}
bool
rb_gvar_is_traced(const struct rb_global_entry *entry)
{
return !!entry->var->trace;
}
static enum rb_id_table_iterator_result
gvar_i(ID key, VALUE val, void *a)
{

View file

@ -3900,3 +3900,38 @@ vm_trace(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE *p
}
}
}
#if VM_CHECK_MODE > 0
static NORETURN( NOINLINE(
#if GCC_VERSION_SINCE(4, 3, 0)
__attribute__((__cold__))
#endif
void vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c)));
static VALUE vm_stack_canary;
void
Init_vm_stack_canary(void)
{
/* This has to be called _after_ our PRNG is properly set up. */
int n = fill_random_bytes(&vm_stack_canary, sizeof vm_stack_canary, false);
VM_ASSERT(n == 0);
}
static void
vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c)
{
/* Because a method has already been called, why not call
* another one. */
const char *insn = rb_insns_name(i);
VALUE inspection = rb_inspect(c);
const char *str = StringValueCStr(inspection);
VALUE message = rb_sprintf("dead canary found at %s: %s", insn, str);
const char *msg = StringValueCStr(message);
rb_bug(msg);
}
#elif !defined(MJIT_HEADER)
void Init_vm_stack_canary(void) { /* nothing to do */ }
#endif