Recompile JIT-ed code without optimization

based on inline cache when JIT cancel happens by that.

This feature was in the original MJIT implementation by Vladimir, but on
merging MJIT to Ruby it was removed for simplification. This commit adds
the functionality again for the following benchmark:

52f05781f6/concurrent-map/bench.rb
(shown float is duration seconds. shorter is better)

* Before
```
$ INHERIT=0 ruby -v bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux]
--
1.6507579649914987

$ INHERIT=0 ruby -v --jit bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux]
--
1.5091587850474752

$ INHERIT=1 ruby -v bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux]
--
1.6124781150138006

$ INHERIT=1 ruby --jit -v bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux]
--
1.7495657080435194 # <-- this
```

* After
```
$ INHERIT=0 ruby -v bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux]
last_commit=Recompile JIT-ed code without optimization
--
1.653559010999743

$ INHERIT=0 ruby --jit -v bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux]
last_commit=Recompile JIT-ed code without optimization
--
1.4738391840364784

$ INHERIT=1 ruby -v bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux]
last_commit=Recompile JIT-ed code without optimization
--
1.645227018976584

$ INHERIT=1 ruby --jit -v bench.rb
ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux]
last_commit=Recompile JIT-ed code without optimization
--
1.523708809982054 # <-- this
```

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67530 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
k0kubun 2019-04-14 04:52:02 +00:00
parent 088df9c8c2
commit 9b6b4674d7
7 changed files with 79 additions and 14 deletions

58
mjit.c
View File

@ -299,16 +299,16 @@ unload_units(void)
verbose(1, "Too many JIT code -- %d units unloaded", units_num - active_units.length);
}
/* Add ISEQ to be JITed in parallel with the current thread.
Unload some JIT codes if there are too many of them. */
void
mjit_add_iseq_to_process(const rb_iseq_t *iseq)
static void
mjit_add_iseq_to_process(const rb_iseq_t *iseq, const struct rb_mjit_compile_info *compile_info)
{
if (!mjit_enabled || pch_status == PCH_FAILED)
return;
iseq->body->jit_func = (mjit_func_t)NOT_READY_JIT_ISEQ_FUNC;
create_unit(iseq);
if (compile_info != NULL)
iseq->body->jit_unit->compile_info = *compile_info;
if (iseq->body->jit_unit == NULL)
/* Failure in creating the unit. */
return;
@ -323,13 +323,19 @@ mjit_add_iseq_to_process(const rb_iseq_t *iseq)
CRITICAL_SECTION_FINISH(3, "in add_iseq_to_process");
}
/* Add ISEQ to be JITed in parallel with the current thread.
Unload some JIT codes if there are too many of them. */
void
rb_mjit_add_iseq_to_process(const rb_iseq_t *iseq)
{
mjit_add_iseq_to_process(iseq, NULL);
}
/* For this timeout seconds, --jit-wait will wait for JIT compilation finish. */
#define MJIT_WAIT_TIMEOUT_SECONDS 60
/* Wait for JIT compilation finish for --jit-wait, and call the function pointer
if the compiled result is not NOT_COMPILED_JIT_ISEQ_FUNC. */
VALUE
mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body)
static void
mjit_wait(struct rb_iseq_constant_body *body)
{
struct timeval tv;
int tries = 0;
@ -350,13 +356,48 @@ mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body)
CRITICAL_SECTION_FINISH(3, "in mjit_wait_call for a client wakeup");
rb_thread_wait_for(tv);
}
}
/* Wait for JIT compilation finish for --jit-wait, and call the function pointer
if the compiled result is not NOT_COMPILED_JIT_ISEQ_FUNC. */
VALUE
mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body)
{
mjit_wait(body);
if ((uintptr_t)body->jit_func <= (uintptr_t)LAST_JIT_ISEQ_FUNC) {
return Qundef;
}
return body->jit_func(ec, ec->cfp);
}
struct rb_mjit_compile_info*
rb_mjit_iseq_compile_info(const struct rb_iseq_constant_body *body)
{
assert(body->jit_unit != NULL);
return &body->jit_unit->compile_info;
}
void
rb_mjit_recompile_iseq(const rb_iseq_t *iseq)
{
if ((ptrdiff_t)iseq->body->jit_func <= (ptrdiff_t)LAST_JIT_ISEQ_FUNC)
return;
verbose(1, "JIT recompile: %s@%s:%d", RSTRING_PTR(iseq->body->location.label),
RSTRING_PTR(rb_iseq_path(iseq)), FIX2INT(iseq->body->location.first_lineno));
CRITICAL_SECTION_START(3, "in rb_mjit_recompile_iseq");
remove_from_list(iseq->body->jit_unit, &active_units);
iseq->body->jit_func = (void *)NOT_ADDED_JIT_ISEQ_FUNC;
add_to_list(iseq->body->jit_unit, &stale_units);
CRITICAL_SECTION_FINISH(3, "in rb_mjit_recompile_iseq");
mjit_add_iseq_to_process(iseq, &iseq->body->jit_unit->compile_info);
if (UNLIKELY(mjit_opts.wait)) {
mjit_wait(iseq->body);
}
}
extern VALUE ruby_archlibdir_path, ruby_prefix_path;
// Initialize header_file, pch_file, libruby_pathflag. Return true on success.
@ -818,6 +859,7 @@ mjit_finish(bool close_handle_p)
free_list(&unit_queue, close_handle_p);
free_list(&active_units, close_handle_p);
free_list(&compact_units, close_handle_p);
free_list(&stale_units, close_handle_p);
finish_conts();
mjit_enabled = false;

14
mjit.h
View File

@ -55,14 +55,24 @@ struct mjit_options {
int max_cache_size;
};
// State of optimization switches
struct rb_mjit_compile_info {
// Disable getinstancevariable/setinstancevariable optimizations based on inline cache
bool disable_ivar_cache;
// Disable send/opt_send_without_block optimizations based on inline cache
bool disable_send_cache;
};
typedef VALUE (*mjit_func_t)(rb_execution_context_t *, rb_control_frame_t *);
RUBY_SYMBOL_EXPORT_BEGIN
RUBY_EXTERN struct mjit_options mjit_opts;
RUBY_EXTERN bool mjit_call_p;
extern void mjit_add_iseq_to_process(const rb_iseq_t *iseq);
extern void rb_mjit_add_iseq_to_process(const rb_iseq_t *iseq);
extern VALUE mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body);
extern struct rb_mjit_compile_info* rb_mjit_iseq_compile_info(const struct rb_iseq_constant_body *body);
extern void rb_mjit_recompile_iseq(const rb_iseq_t *iseq);
RUBY_SYMBOL_EXPORT_END
extern bool mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname);
@ -120,7 +130,7 @@ mjit_exec(rb_execution_context_t *ec)
RB_DEBUG_COUNTER_INC(mjit_exec_not_added);
if (total_calls == mjit_opts.min_calls && mjit_target_iseq_p(body)) {
RB_DEBUG_COUNTER_INC(mjit_exec_not_added_add_iseq);
mjit_add_iseq_to_process(iseq);
rb_mjit_add_iseq_to_process(iseq);
if (UNLIKELY(mjit_opts.wait)) {
return mjit_wait_call(ec, body);
}

View File

@ -37,6 +37,8 @@ struct compile_status {
// Safely-accessible cache entries copied from main thread.
union iseq_inline_storage_entry *is_entries;
struct rb_call_cache *cc_entries;
// Mutated optimization levels
struct rb_mjit_compile_info *compile_info;
};
/* Storage to keep data which is consistent in each conditional branch.
@ -213,6 +215,7 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname)
alloca(sizeof(struct rb_call_cache) * (body->ci_size + body->ci_kw_size)) : NULL,
.is_entries = (body->is_size > 0) ?
alloca(sizeof(union iseq_inline_storage_entry) * body->is_size) : NULL,
.compile_info = rb_mjit_iseq_compile_info(body),
};
memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size);
if ((status.cc_entries != NULL || status.is_entries != NULL)
@ -235,6 +238,7 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname)
else {
fprintf(f, " VALUE *stack = reg_cfp->sp;\n");
}
fprintf(f, " static const rb_iseq_t *original_iseq = 0x%"PRIxVALUE";\n", (VALUE)iseq);
fprintf(f, " static const VALUE *const original_body_iseq = (VALUE *)0x%"PRIxVALUE";\n",
(VALUE)body->iseq_encoded);

View File

@ -144,6 +144,8 @@ struct rb_mjit_unit {
/* Only used by unload_units. Flag to check this unit is currently on stack or not. */
char used_code_p;
struct list_node unode;
// mjit_compile's optimization switches
struct rb_mjit_compile_info compile_info;
};
/* Linked list of struct rb_mjit_unit. */
@ -184,6 +186,8 @@ static struct rb_mjit_unit_list unit_queue = { LIST_HEAD_INIT(unit_queue.head) }
static struct rb_mjit_unit_list active_units = { LIST_HEAD_INIT(active_units.head) };
/* List of compacted so files which will be cleaned up by `free_list()` in `mjit_finish()`. */
static struct rb_mjit_unit_list compact_units = { LIST_HEAD_INIT(compact_units.head) };
// List of units before recompilation and just waiting for dlclose().
static struct rb_mjit_unit_list stale_units = { LIST_HEAD_INIT(stale_units.head) };
/* The number of so far processed ISEQs, used to generate unique id. */
static int current_unit_num;
/* A mutex for conitionals and critical sections. */

View File

@ -10,6 +10,7 @@ class TestJIT < Test::Unit::TestCase
include JITSupport
IGNORABLE_PATTERNS = [
/\AJIT recompile: .+\n\z/,
/\ASuccessful MJIT finish\n\z/,
]
@ -529,7 +530,7 @@ class TestJIT < Test::Unit::TestCase
end;
# send call -> optimized call (send JIT) -> optimized call
assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '122', success_count: 1, min_calls: 2)
assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '122', success_count: 2, min_calls: 2)
begin;
obj = Object.new
def obj.[](h)
@ -704,7 +705,7 @@ class TestJIT < Test::Unit::TestCase
end
def test_inlined_undefined_ivar
assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "bbb", success_count: 2, min_calls: 3)
assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "bbb", success_count: 3, min_calls: 3)
begin;
class Foo
def initialize

View File

@ -17,7 +17,7 @@
IC ic_copy = &(status->is_entries + ((union iseq_inline_storage_entry *)ic - body->is_entries))->cache;
%
% # compiler: Consider cfp->self as T_OBJECT if ic_copy->ic_serial is set
if (ic_copy->ic_serial) {
if (!status->compile_info->disable_ivar_cache && ic_copy->ic_serial) {
% # JIT: optimize away motion of sp and pc. This path does not call rb_warning() and so it's always leaf and not `handles_sp`.
% # <%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%>
%
@ -43,6 +43,8 @@
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos);
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size);
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_ivar);\n");
fprintf(f, " rb_mjit_iseq_compile_info(original_iseq->body)->disable_ivar_cache = true;\n");
fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n");
fprintf(f, " goto cancel;\n");
fprintf(f, " }\n");

View File

@ -16,7 +16,7 @@
% # compiler: Use copied cc to avoid race condition
CALL_CACHE cc_copy = status->cc_entries + (cc - body->cc_entries);
%
if (has_valid_method_type(cc_copy)) {
if (!status->compile_info->disable_send_cache && has_valid_method_type(cc_copy)) {
const rb_iseq_t *iseq;
unsigned int argc = ci->orig_argc; // this `argc` variable is for calculating a value's position on stack considering `blockarg`.
% if insn.name == 'send'
@ -39,6 +39,8 @@
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos);
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size);
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_send_inline);\n");
fprintf(f, " rb_mjit_iseq_compile_info(original_iseq->body)->disable_send_cache = true;\n");
fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n");
fprintf(f, " goto cancel;\n");
fprintf(f, " }\n");