1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/vm_insnhelper.h
eileencodes b91b3bc771 Add a cache for class variables
Redo of 34a2acdac7 and
931138b006 which were reverted.

GitHub PR #4340.

This change implements a cache for class variables. Previously there was
no cache for cvars. Cvar access is slow due to needing to travel all the
way up th ancestor tree before returning the cvar value. The deeper the
ancestor tree the slower cvar access will be.

The benefits of the cache are more visible with a higher number of
included modules due to the way Ruby looks up class variables. The
benchmark here includes 26 modules and shows with the cache, this branch
is 6.5x faster when accessing class variables.

```
compare-ruby: ruby 3.1.0dev (2021-03-15T06:22:34Z master 9e5105c) [x86_64-darwin19]
built-ruby: ruby 3.1.0dev (2021-03-15T12:12:44Z add-cache-for-clas.. c6be009) [x86_64-darwin19]

|         |compare-ruby|built-ruby|
|:--------|-----------:|---------:|
|vm_cvar  |      5.681M|   36.980M|
|         |           -|     6.51x|
```

Benchmark.ips calling `ActiveRecord::Base.logger` from within a Rails
application. ActiveRecord::Base.logger has 71 ancestors. The more
ancestors a tree has, the more clear the speed increase. IE if Base had
only one ancestor we'd see no improvement. This benchmark is run on a
vanilla Rails application.

Benchmark code:

```ruby
require "benchmark/ips"
require_relative "config/environment"

Benchmark.ips do |x|
  x.report "logger" do
    ActiveRecord::Base.logger
  end
end
```

Ruby 3.0 master / Rails 6.1:

```
Warming up --------------------------------------
              logger   155.251k i/100ms
Calculating -------------------------------------
```

Ruby 3.0 with cvar cache /  Rails 6.1:

```
Warming up --------------------------------------
              logger     1.546M i/100ms
Calculating -------------------------------------
              logger     14.857M (± 4.8%) i/s -     74.198M in   5.006202s
```

Lastly we ran a benchmark to demonstate the difference between master
and our cache when the number of modules increases. This benchmark
measures 1 ancestor, 30 ancestors, and 100 ancestors.

Ruby 3.0 master:

```
Warming up --------------------------------------
            1 module     1.231M i/100ms
          30 modules   432.020k i/100ms
         100 modules   145.399k i/100ms
Calculating -------------------------------------
            1 module     12.210M (± 2.1%) i/s -     61.553M in   5.043400s
          30 modules      4.354M (± 2.7%) i/s -     22.033M in   5.063839s
         100 modules      1.434M (± 2.9%) i/s -      7.270M in   5.072531s

Comparison:
            1 module: 12209958.3 i/s
          30 modules:  4354217.8 i/s - 2.80x  (± 0.00) slower
         100 modules:  1434447.3 i/s - 8.51x  (± 0.00) slower
```

Ruby 3.0 with cvar cache:

```
Warming up --------------------------------------
            1 module     1.641M i/100ms
          30 modules     1.655M i/100ms
         100 modules     1.620M i/100ms
Calculating -------------------------------------
            1 module     16.279M (± 3.8%) i/s -     82.038M in   5.046923s
          30 modules     15.891M (± 3.9%) i/s -     79.459M in   5.007958s
         100 modules     16.087M (± 3.6%) i/s -     81.005M in   5.041931s

Comparison:
            1 module: 16279458.0 i/s
         100 modules: 16087484.6 i/s - same-ish: difference falls within error
          30 modules: 15891406.2 i/s - same-ish: difference falls within error
```

Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2021-06-18 10:02:44 -07:00

260 lines
8.2 KiB
C

#ifndef RUBY_INSNHELPER_H
#define RUBY_INSNHELPER_H
/**********************************************************************
insnhelper.h - helper macros to implement each instructions
$Author$
created at: 04/01/01 15:50:34 JST
Copyright (C) 2004-2007 Koichi Sasada
**********************************************************************/
MJIT_SYMBOL_EXPORT_BEGIN
RUBY_EXTERN VALUE ruby_vm_const_missing_count;
RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
RUBY_EXTERN rb_serial_t ruby_vm_class_serial;
RUBY_EXTERN rb_serial_t ruby_vm_global_cvar_state;
MJIT_SYMBOL_EXPORT_END
#if VM_COLLECT_USAGE_DETAILS
#define COLLECT_USAGE_INSN(insn) vm_collect_usage_insn(insn)
#define COLLECT_USAGE_OPERAND(insn, n, op) vm_collect_usage_operand((insn), (n), ((VALUE)(op)))
#define COLLECT_USAGE_REGISTER(reg, s) vm_collect_usage_register((reg), (s))
#else
#define COLLECT_USAGE_INSN(insn) /* none */
#define COLLECT_USAGE_OPERAND(insn, n, op) /* none */
#define COLLECT_USAGE_REGISTER(reg, s) /* none */
#endif
/**********************************************************/
/* deal with stack */
/**********************************************************/
#define PUSH(x) (SET_SV(x), INC_SP(1))
#define TOPN(n) (*(GET_SP()-(n)-1))
#define POPN(n) (DEC_SP(n))
#define POP() (DEC_SP(1))
#define STACK_ADDR_FROM_TOP(n) (GET_SP()-(n))
/**********************************************************/
/* deal with registers */
/**********************************************************/
#define VM_REG_CFP (reg_cfp)
#define VM_REG_PC (VM_REG_CFP->pc)
#define VM_REG_SP (VM_REG_CFP->sp)
#define VM_REG_EP (VM_REG_CFP->ep)
#define RESTORE_REGS() do { \
VM_REG_CFP = ec->cfp; \
} while (0)
#if VM_COLLECT_USAGE_DETAILS
enum vm_regan_regtype {
VM_REGAN_PC = 0,
VM_REGAN_SP = 1,
VM_REGAN_EP = 2,
VM_REGAN_CFP = 3,
VM_REGAN_SELF = 4,
VM_REGAN_ISEQ = 5
};
enum vm_regan_acttype {
VM_REGAN_ACT_GET = 0,
VM_REGAN_ACT_SET = 1
};
#define COLLECT_USAGE_REGISTER_HELPER(a, b, v) \
(COLLECT_USAGE_REGISTER((VM_REGAN_##a), (VM_REGAN_ACT_##b)), (v))
#else
#define COLLECT_USAGE_REGISTER_HELPER(a, b, v) (v)
#endif
/* PC */
#define GET_PC() (COLLECT_USAGE_REGISTER_HELPER(PC, GET, VM_REG_PC))
#define SET_PC(x) (VM_REG_PC = (COLLECT_USAGE_REGISTER_HELPER(PC, SET, (x))))
#define GET_CURRENT_INSN() (*GET_PC())
#define GET_OPERAND(n) (GET_PC()[(n)])
#define ADD_PC(n) (SET_PC(VM_REG_PC + (n)))
#define JUMP(dst) (SET_PC(VM_REG_PC + (dst)))
/* frame pointer, environment pointer */
#define GET_CFP() (COLLECT_USAGE_REGISTER_HELPER(CFP, GET, VM_REG_CFP))
#define GET_EP() (COLLECT_USAGE_REGISTER_HELPER(EP, GET, VM_REG_EP))
#define SET_EP(x) (VM_REG_EP = (COLLECT_USAGE_REGISTER_HELPER(EP, SET, (x))))
#define GET_LEP() (VM_EP_LEP(GET_EP()))
/* SP */
#define GET_SP() (COLLECT_USAGE_REGISTER_HELPER(SP, GET, VM_REG_SP))
#define SET_SP(x) (VM_REG_SP = (COLLECT_USAGE_REGISTER_HELPER(SP, SET, (x))))
#define INC_SP(x) (VM_REG_SP += (COLLECT_USAGE_REGISTER_HELPER(SP, SET, (x))))
#define DEC_SP(x) (VM_REG_SP -= (COLLECT_USAGE_REGISTER_HELPER(SP, SET, (x))))
#define SET_SV(x) (*GET_SP() = rb_ractor_confirm_belonging(x))
/* set current stack value as x */
/* instruction sequence C struct */
#define GET_ISEQ() (GET_CFP()->iseq)
/**********************************************************/
/* deal with variables */
/**********************************************************/
#define GET_PREV_EP(ep) ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
/**********************************************************/
/* deal with values */
/**********************************************************/
#define GET_SELF() (COLLECT_USAGE_REGISTER_HELPER(SELF, GET, GET_CFP()->self))
/**********************************************************/
/* deal with control flow 2: method/iterator */
/**********************************************************/
/* set fastpath when cached method is *NOT* protected
* because inline method cache does not care about receiver.
*/
static inline void
CC_SET_FASTPATH(const struct rb_callcache *cc, vm_call_handler func, bool enabled)
{
if (LIKELY(enabled)) {
vm_cc_call_set(cc, func);
}
}
#define GET_BLOCK_HANDLER() (GET_LEP()[VM_ENV_DATA_INDEX_SPECVAL])
/**********************************************************/
/* deal with control flow 3: exception */
/**********************************************************/
/**********************************************************/
/* deal with stack canary */
/**********************************************************/
#if VM_CHECK_MODE > 0
#define SETUP_CANARY(cond) \
VALUE *canary = 0; \
if (cond) { \
canary = GET_SP(); \
SET_SV(vm_stack_canary); \
} \
else {\
SET_SV(Qfalse); /* cleanup */ \
}
#define CHECK_CANARY(cond, insn) \
if (cond) { \
if (*canary == vm_stack_canary) { \
*canary = Qfalse; /* cleanup */ \
} \
else { \
rb_vm_canary_is_found_dead(insn, *canary); \
} \
}
#else
#define SETUP_CANARY(cond) if (cond) {} else {}
#define CHECK_CANARY(cond, insn) if (cond) {(void)(insn);}
#endif
/**********************************************************/
/* others */
/**********************************************************/
#ifndef MJIT_HEADER
#define CALL_SIMPLE_METHOD() do { \
rb_snum_t x = leaf ? INSN_ATTR(width) : 0; \
rb_snum_t y = attr_width_opt_send_without_block(0); \
rb_snum_t z = x - y; \
ADD_PC(z); \
DISPATCH_ORIGINAL_INSN(opt_send_without_block); \
} while (0)
#endif
#define PREV_CLASS_SERIAL() (ruby_vm_class_serial)
#define NEXT_CLASS_SERIAL() (++ruby_vm_class_serial)
#define GET_GLOBAL_CONSTANT_STATE() (ruby_vm_global_constant_state)
#define INC_GLOBAL_CONSTANT_STATE() (++ruby_vm_global_constant_state)
#define GET_GLOBAL_CVAR_STATE() (ruby_vm_global_cvar_state)
#define INC_GLOBAL_CVAR_STATE() (++ruby_vm_global_cvar_state)
static inline struct vm_throw_data *
THROW_DATA_NEW(VALUE val, const rb_control_frame_t *cf, int st)
{
struct vm_throw_data *obj = (struct vm_throw_data *)rb_imemo_new(imemo_throw_data, val, (VALUE)cf, 0, 0);
obj->throw_state = st;
return obj;
}
static inline VALUE
THROW_DATA_VAL(const struct vm_throw_data *obj)
{
VM_ASSERT(THROW_DATA_P(obj));
return obj->throw_obj;
}
static inline const rb_control_frame_t *
THROW_DATA_CATCH_FRAME(const struct vm_throw_data *obj)
{
VM_ASSERT(THROW_DATA_P(obj));
return obj->catch_frame;
}
static inline int
THROW_DATA_STATE(const struct vm_throw_data *obj)
{
VM_ASSERT(THROW_DATA_P(obj));
return obj->throw_state;
}
static inline int
THROW_DATA_CONSUMED_P(const struct vm_throw_data *obj)
{
VM_ASSERT(THROW_DATA_P(obj));
return obj->flags & THROW_DATA_CONSUMED;
}
static inline void
THROW_DATA_CATCH_FRAME_SET(struct vm_throw_data *obj, const rb_control_frame_t *cfp)
{
VM_ASSERT(THROW_DATA_P(obj));
obj->catch_frame = cfp;
}
static inline void
THROW_DATA_STATE_SET(struct vm_throw_data *obj, int st)
{
VM_ASSERT(THROW_DATA_P(obj));
obj->throw_state = st;
}
static inline void
THROW_DATA_CONSUMED_SET(struct vm_throw_data *obj)
{
if (THROW_DATA_P(obj) &&
THROW_DATA_STATE(obj) == TAG_BREAK) {
obj->flags |= THROW_DATA_CONSUMED;
}
}
#define IS_ARGS_SPLAT(ci) (vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT)
#define IS_ARGS_KEYWORD(ci) (vm_ci_flag(ci) & VM_CALL_KWARG)
#define IS_ARGS_KW_SPLAT(ci) (vm_ci_flag(ci) & VM_CALL_KW_SPLAT)
#define IS_ARGS_KW_OR_KW_SPLAT(ci) (vm_ci_flag(ci) & (VM_CALL_KWARG | VM_CALL_KW_SPLAT))
#define IS_ARGS_KW_SPLAT_MUT(ci) (vm_ci_flag(ci) & VM_CALL_KW_SPLAT_MUT)
/* If this returns true, an optimized function returned by `vm_call_iseq_setup_func`
can be used as a fastpath. */
static inline bool
vm_call_iseq_optimizable_p(const struct rb_callinfo *ci, const struct rb_callcache *cc)
{
return !IS_ARGS_SPLAT(ci) && !IS_ARGS_KEYWORD(ci) &&
METHOD_ENTRY_CACHEABLE(vm_cc_cme(cc));
}
#endif /* RUBY_INSNHELPER_H */