From 16c6984bb97409029e213154ac4f633ae04af3d8 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Mon, 18 Mar 2019 14:25:47 +0900 Subject: [PATCH] Separate keyword arguments from positional arguments And, allow non-symbol keys as a keyword arugment --- class.c | 3 +- compile.c | 7 ++- hash.c | 8 ++++ internal.h | 1 + parse.y | 12 +---- proc.c | 2 +- vm.c | 16 +++---- vm_args.c | 118 ++++++++++++++++++++++++++++++++++++++++++++---- vm_insnhelper.c | 36 +++++++++------ 9 files changed, 152 insertions(+), 51 deletions(-) diff --git a/class.c b/class.c index 2e8710194d..6b3b6623f1 100644 --- a/class.c +++ b/class.c @@ -1830,8 +1830,7 @@ rb_keyword_error_new(const char *error, VALUE keys) rb_str_cat_cstr(error_message, ": "); while (1) { const VALUE k = RARRAY_AREF(keys, i); - Check_Type(k, T_SYMBOL); /* wrong hash is given to rb_get_kwargs */ - rb_str_append(error_message, rb_sym2str(k)); + rb_str_append(error_message, rb_inspect(k)); if (++i >= len) break; rb_str_cat_cstr(error_message, ", "); } diff --git a/compile.c b/compile.c index cf29da3e1b..63b893d7de 100644 --- a/compile.c +++ b/compile.c @@ -3798,7 +3798,7 @@ compile_keyword_arg(rb_iseq_t *iseq, LINK_ANCHOR *const ret, { if (kw_arg_ptr == NULL) return FALSE; - if (nd_type(root_node) == NODE_HASH && root_node->nd_head && nd_type(root_node->nd_head) == NODE_ARRAY) { + if (nd_type(root_node) == NODE_HASH && !root_node->nd_brace && root_node->nd_head && nd_type(root_node->nd_head) == NODE_ARRAY) { const NODE *node = root_node->nd_head; while (node) { @@ -3806,13 +3806,14 @@ compile_keyword_arg(rb_iseq_t *iseq, LINK_ANCHOR *const ret, assert(nd_type(node) == NODE_ARRAY); if (!key_node) { - if (flag && !root_node->nd_brace) *flag |= VM_CALL_KW_SPLAT; + if (flag) *flag |= VM_CALL_KW_SPLAT; return FALSE; } else if (nd_type(key_node) == NODE_LIT && RB_TYPE_P(key_node->nd_lit, T_SYMBOL)) { /* can be keywords */ } else { + if (flag) *flag |= VM_CALL_KW_SPLAT; return FALSE; } node = node->nd_next; /* skip value node */ @@ -7351,6 +7352,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, in ADD_INSN (args, line, concatarray); --argc; } + flag |= VM_CALL_KW_SPLAT; } else if (local_body->param.flags.has_kwrest) { int idx = local_body->local_table_size - local_kwd->rest_start; @@ -7364,6 +7366,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, in else { argc++; } + flag |= VM_CALL_KW_SPLAT; } } diff --git a/hash.c b/hash.c index b61784af3a..0f1c460c49 100644 --- a/hash.c +++ b/hash.c @@ -30,6 +30,8 @@ # endif #endif +VALUE rb_no_keyword_hash; + #ifndef HASH_DEBUG #define HASH_DEBUG 0 #endif @@ -3274,6 +3276,8 @@ inspect_hash(VALUE hash, VALUE dummy, int recur) static VALUE rb_hash_inspect(VALUE hash) { + if (hash == rb_no_keyword_hash) + return rb_usascii_str_new2("{(NO KEYWORD)}"); if (RHASH_EMPTY_P(hash)) return rb_usascii_str_new2("{}"); return rb_exec_recursive(inspect_hash, hash, 0); @@ -6273,6 +6277,10 @@ Init_Hash(void) */ rb_define_global_const("ENV", envtbl); + rb_no_keyword_hash = rb_hash_new(); + rb_hash_freeze(rb_no_keyword_hash); + rb_gc_register_mark_object(rb_no_keyword_hash); + /* for callcc */ ruby_register_rollback_func_for_ensure(hash_foreach_ensure, hash_foreach_ensure_rollback); diff --git a/internal.h b/internal.h index 5a846c13e0..2818467c19 100644 --- a/internal.h +++ b/internal.h @@ -2397,6 +2397,7 @@ VALUE rb_str_normalize_ospath(const char *ptr, long len); /* hash.c (export) */ VALUE rb_hash_delete_entry(VALUE hash, VALUE key); VALUE rb_ident_hash_new(void); +extern VALUE rb_no_keyword_hash; /* io.c (export) */ void rb_maygvl_fd_fix_cloexec(int fd); diff --git a/parse.y b/parse.y index 3c4a9d8a8a..4dfdd5d2a3 100644 --- a/parse.y +++ b/parse.y @@ -5134,12 +5134,6 @@ assocs : assoc assocs = tail; } else if (tail) { - if (assocs->nd_head && - !tail->nd_head && nd_type(tail->nd_next) == NODE_ARRAY && - nd_type(tail->nd_next->nd_head) == NODE_HASH) { - /* DSTAR */ - tail = tail->nd_next->nd_head->nd_head; - } assocs = list_concat(assocs, tail); } $$ = assocs; @@ -5177,11 +5171,7 @@ assoc : arg_value tASSOC arg_value | tDSTAR arg_value { /*%%%*/ - if (nd_type($2) == NODE_HASH && - !($2->nd_head && $2->nd_head->nd_alen)) - $$ = 0; - else - $$ = list_append(p, NEW_LIST(0, &@$), $2); + $$ = list_append(p, NEW_LIST(0, &@$), $2); /*% %*/ /*% ripper: assoc_splat!($2) %*/ } diff --git a/proc.c b/proc.c index dd2fcfd957..8f51ff10eb 100644 --- a/proc.c +++ b/proc.c @@ -2683,7 +2683,7 @@ rb_mod_method_location(VALUE mod, ID id) return rb_method_entry_location(me); } -VALUE +MJIT_FUNC_EXPORTED VALUE rb_obj_method_location(VALUE obj, ID id) { return rb_mod_method_location(CLASS_OF(obj), id); diff --git a/vm.c b/vm.c index 157a43967e..c727554483 100644 --- a/vm.c +++ b/vm.c @@ -2779,6 +2779,9 @@ static VALUE core_hash_merge_kwd(VALUE hash, VALUE kw); static VALUE core_hash_merge(VALUE hash, long argc, const VALUE *argv) { + if (hash == rb_no_keyword_hash) { + hash = rb_hash_new(); + } Check_Type(hash, T_HASH); VM_ASSERT(argc % 2 == 0); rb_hash_bulk_insert(argc, argv, hash); @@ -2790,23 +2793,14 @@ m_core_hash_merge_ptr(int argc, VALUE *argv, VALUE recv) { VALUE hash = argv[0]; - REWIND_CFP(core_hash_merge(hash, argc-1, argv+1)); + REWIND_CFP(hash = core_hash_merge(hash, argc-1, argv+1)); return hash; } -static void -kw_check_symbol(VALUE key) -{ - if (!SYMBOL_P(key)) { - rb_raise(rb_eTypeError, "hash key %+"PRIsVALUE" is not a Symbol", - key); - } -} static int kwmerge_i(VALUE key, VALUE value, VALUE hash) { - kw_check_symbol(key); rb_hash_aset(hash, key, value); return ST_CONTINUE; } @@ -2821,6 +2815,8 @@ m_core_hash_merge_kwd(VALUE recv, VALUE hash, VALUE kw) static VALUE core_hash_merge_kwd(VALUE hash, VALUE kw) { + if (RHASH_EMPTY_P(hash) && kw == rb_no_keyword_hash) + return rb_no_keyword_hash; rb_hash_foreach(rb_to_hash_type(kw), kwmerge_i, hash); return hash; } diff --git a/vm_args.c b/vm_args.c index 96fca84cf4..42f6cff93a 100644 --- a/vm_args.c +++ b/vm_args.c @@ -186,12 +186,16 @@ args_rest_array(struct args_info *args) static int keyword_hash_p(VALUE *kw_hash_ptr, VALUE *rest_hash_ptr) { + if (*kw_hash_ptr == rb_no_keyword_hash) { + *kw_hash_ptr = Qnil; + *rest_hash_ptr = Qfalse; + return TRUE; + } *rest_hash_ptr = rb_check_hash_type(*kw_hash_ptr); if (!NIL_P(*rest_hash_ptr)) { - VALUE hash = rb_extract_keywords(rest_hash_ptr); - if (!hash) hash = Qnil; - *kw_hash_ptr = hash; + *kw_hash_ptr = *rest_hash_ptr; + *rest_hash_ptr = Qfalse; return TRUE; } else { @@ -483,7 +487,7 @@ args_setup_kw_parameters(rb_execution_context_t *const ec, const rb_iseq_t *cons static inline void args_setup_kw_rest_parameter(VALUE keyword_hash, VALUE *locals) { - locals[0] = NIL_P(keyword_hash) ? rb_hash_new() : rb_hash_dup(keyword_hash); + locals[0] = NIL_P(keyword_hash) ? rb_no_keyword_hash : rb_hash_dup(keyword_hash); } static inline void @@ -509,6 +513,40 @@ fill_keys_values(st_data_t key, st_data_t val, st_data_t ptr) return ST_CONTINUE; } +static inline VALUE +get_loc(struct rb_calling_info *calling, const struct rb_call_info *ci) +{ + return rb_obj_method_location(calling->recv, ci->mid); +} + +static inline void +rb_warn_last_hash_to_keyword(struct rb_calling_info *calling, const struct rb_call_info *ci) +{ + if (calling->recv == Qundef) return; + VALUE loc = get_loc(calling, ci); + if (NIL_P(loc)) { + rb_warn("The last argument for `%s' is used as the keyword parameter", rb_id2name(ci->mid)); + } + else { + rb_warn("The last argument for `%s' (defined at %s:%d) is used as the keyword parameter", + rb_id2name(ci->mid), RSTRING_PTR(RARRAY_AREF(loc, 0)), FIX2INT(RARRAY_AREF(loc, 1))); + } +} + +static inline void +rb_warn_keyword_to_last_hash(struct rb_calling_info *calling, const struct rb_call_info *ci) +{ + if (calling->recv == Qundef) return; + VALUE loc = get_loc(calling, ci); + if (NIL_P(loc)) { + rb_warn("The keyword argument for `%s' is used as the last parameter", rb_id2name(ci->mid)); + } + else { + rb_warn("The keyword argument for `%s' (defined at %s:%d) is used as the last parameter", + rb_id2name(ci->mid), RSTRING_PTR(RARRAY_AREF(loc, 0)), FIX2INT(RARRAY_AREF(loc, 1))); + } +} + static int setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * const iseq, struct rb_calling_info *const calling, @@ -520,10 +558,12 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co int opt_pc = 0; int given_argc; int kw_splat = FALSE; + unsigned int kw_flag = ci->flag & (VM_CALL_KWARG | VM_CALL_KW_SPLAT); struct args_info args_body, *args; VALUE keyword_hash = Qnil; VALUE * const orig_sp = ec->cfp->sp; unsigned int i; + int k2n_warned = FALSE; vm_check_canary(ec, orig_sp); /* @@ -551,7 +591,7 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co args->argv = locals; args->rest_dupped = FALSE; - if (ci->flag & VM_CALL_KWARG) { + if (kw_flag & VM_CALL_KWARG) { args->kw_arg = ((struct rb_call_info_with_kwarg *)ci)->kw_arg; if (iseq->body->param.flags.has_kw) { @@ -565,6 +605,7 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co else { args->kw_argv = NULL; given_argc = args_kw_argv_to_hash(args); + kw_flag |= VM_CALL_KW_SPLAT; } } else { @@ -576,8 +617,24 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co args->rest = locals[--args->argc]; args->rest_index = 0; given_argc += RARRAY_LENINT(args->rest) - 1; + if (kw_flag & VM_CALL_KW_SPLAT) { + int len = RARRAY_LENINT(args->rest); + if (len > 0 && RARRAY_AREF(args->rest, len - 1) == rb_no_keyword_hash) { + arg_rest_dup(args); + rb_ary_pop(args->rest); + given_argc--; + kw_flag &= ~VM_CALL_KW_SPLAT; + } + } } else { + if (kw_flag & VM_CALL_KW_SPLAT) { + if (args->argv[args->argc-1] == rb_no_keyword_hash) { + args->argc--; + given_argc--; + kw_flag &= ~VM_CALL_KW_SPLAT; + } + } args->rest = Qfalse; } @@ -598,6 +655,12 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co /* argc check */ if (given_argc < min_argc) { if (given_argc == min_argc - 1 && args->kw_argv) { + /* Warn the following: + * def foo(a, k:1) p [a, k] end + * foo(k:42) #=> [{:k=>42}, 1] + */ + rb_warn_keyword_to_last_hash(calling, ci); + k2n_warned = TRUE; args_stored_kw_argv_to_hash(args); given_argc = args_argc(args); } @@ -613,16 +676,41 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co } } - if (ci->flag & VM_CALL_KW_SPLAT) { + if (kw_flag & VM_CALL_KW_SPLAT) { kw_splat = !iseq->body->param.flags.has_rest; } if (given_argc > min_argc && (iseq->body->param.flags.has_kw || iseq->body->param.flags.has_kwrest || (kw_splat && given_argc > max_argc)) && args->kw_argv == NULL) { - if (args_pop_keyword_hash(args, &keyword_hash)) { - given_argc--; + if (((kw_flag & (VM_CALL_KWARG | VM_CALL_KW_SPLAT)) || !ec->cfp->iseq /* called from C */)) { + if (args_pop_keyword_hash(args, &keyword_hash)) { + given_argc--; + } } + else if (given_argc > max_argc && max_argc >= 0) { + if (args_pop_keyword_hash(args, &keyword_hash)) { + /* Warn the following: + * def foo(k:1) p [k]; end + * foo({k:42}) #=> 42 + */ + if (ec->cfp->iseq) { + /* called from Ruby level */ + rb_warn_last_hash_to_keyword(calling, ci); + } + given_argc--; + } + } + } + else if (!k2n_warned && + !((kw_flag & VM_CALL_KWARG) && iseq->body->param.flags.has_kw) && + given_argc > min_argc && + (kw_flag & (VM_CALL_KWARG | VM_CALL_KW_SPLAT))) { + /* Warn the following: + * def foo(x, opt=1) p [x]; end + * foo(k:42) #=> [{:k=>42}] + */ + rb_warn_keyword_to_last_hash(calling, ci); } if (given_argc > max_argc && max_argc != UNLIMITED_ARGUMENTS) { @@ -683,7 +771,7 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co else if (iseq->body->param.flags.has_kwrest) { args_setup_kw_rest_parameter(keyword_hash, locals + iseq->body->param.keyword->rest_start); } - else if (!NIL_P(keyword_hash) && RHASH_SIZE(keyword_hash) > 0) { + else if (!NIL_P(keyword_hash) && RHASH_SIZE(keyword_hash) > 0 && arg_setup_type == arg_setup_method) { argument_kw_error(ec, iseq, "unknown", rb_hash_keys(keyword_hash)); } else if (kw_splat && NIL_P(keyword_hash)) { @@ -792,6 +880,8 @@ vm_caller_setup_arg_splat(rb_control_frame_t *cfp, struct rb_calling_info *calli CHECK_VM_STACK_OVERFLOW(cfp, len); + if (ptr[len - 1] == rb_no_keyword_hash) len--; + for (i = 0; i < len; i++) { *cfp->sp++ = ptr[i]; } @@ -800,7 +890,7 @@ vm_caller_setup_arg_splat(rb_control_frame_t *cfp, struct rb_calling_info *calli } static inline void -vm_caller_setup_arg_kw(rb_control_frame_t *cfp, struct rb_calling_info *calling, const struct rb_call_info *ci) +vm_caller_setup_arg_kw(rb_control_frame_t *cfp, struct rb_calling_info *calling, const struct rb_call_info *ci, int cfunc) { struct rb_call_info_with_kwarg *ci_kw = (struct rb_call_info_with_kwarg *)ci; const VALUE *const passed_keywords = ci_kw->kw_arg->keywords; @@ -809,6 +899,14 @@ vm_caller_setup_arg_kw(rb_control_frame_t *cfp, struct rb_calling_info *calling, VALUE *sp = cfp->sp; int i; + /* Warn the following: + * def foo(x) p [x] end + * foo(k:42) #=> [{:k=>42}] + */ + if (!cfunc) { + rb_warn_keyword_to_last_hash(calling, ci); + } + for (i=0; iflag & VM_CALL_KW_SPLAT))) { if (LIKELY(rb_simple_iseq_p(iseq))) { rb_control_frame_t *cfp = ec->cfp; - CALLER_SETUP_ARG(cfp, calling, ci); /* splat arg */ + CALLER_SETUP_ARG(cfp, calling, ci, 0); /* splat arg */ if (calling->argc != iseq->body->param.lead_num) { argument_arity_error(ec, iseq, calling->argc, iseq->body->param.lead_num, iseq->body->param.lead_num); @@ -1867,7 +1867,7 @@ vm_callee_setup_arg(rb_execution_context_t *ec, struct rb_calling_info *calling, } else if (rb_iseq_only_optparam_p(iseq)) { rb_control_frame_t *cfp = ec->cfp; - CALLER_SETUP_ARG(cfp, calling, ci); /* splat arg */ + CALLER_SETUP_ARG(cfp, calling, ci, 0); /* splat arg */ const int lead_num = iseq->body->param.lead_num; const int opt_num = iseq->body->param.opt_num; @@ -2214,7 +2214,7 @@ vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb { RB_DEBUG_COUNTER_INC(ccf_cfunc); - CALLER_SETUP_ARG(reg_cfp, calling, ci); + CALLER_SETUP_ARG(reg_cfp, calling, ci, 1); return vm_call_cfunc_with_frame(ec, reg_cfp, calling, ci, cc); } @@ -2256,7 +2256,7 @@ vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_c VALUE *argv; int argc; - CALLER_SETUP_ARG(cfp, calling, ci); + CALLER_SETUP_ARG(cfp, calling, ci, 1); argc = calling->argc; argv = ALLOCA_N(VALUE, argc); MEMCPY(argv, cfp->sp - argc, VALUE, argc); @@ -2286,7 +2286,7 @@ vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct struct rb_call_info_with_kwarg ci_entry; struct rb_call_cache cc_entry, *cc; - CALLER_SETUP_ARG(reg_cfp, calling, orig_ci); + CALLER_SETUP_ARG(reg_cfp, calling, orig_ci, 1); i = calling->argc - 1; @@ -2303,7 +2303,12 @@ vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct ci = &ci_entry.ci; ci_entry.ci = *orig_ci; } - ci->flag = ci->flag & ~VM_CALL_KWARG; /* TODO: delegate kw_arg without making a Hash object */ + unsigned int kw_splat = 0; + if (ci->flag & VM_CALL_KWARG) { + /* TODO: delegate kw_arg without making a Hash object */ + ci->flag = ci->flag & ~VM_CALL_KWARG; + kw_splat = VM_CALL_KW_SPLAT; + } /* setup new cc */ cc_entry = *orig_cc; @@ -2333,7 +2338,7 @@ vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct } cc->me = rb_callable_method_entry_with_refinements(CLASS_OF(calling->recv), ci->mid, NULL); - ci->flag = VM_CALL_FCALL | VM_CALL_OPT_SEND; + ci->flag = VM_CALL_FCALL | VM_CALL_OPT_SEND | kw_splat; return vm_call_method(ec, reg_cfp, calling, ci, cc); } @@ -2392,7 +2397,7 @@ vm_call_method_missing(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_call_cache cc_entry, *cc; unsigned int argc; - CALLER_SETUP_ARG(reg_cfp, calling, orig_ci); + CALLER_SETUP_ARG(reg_cfp, calling, orig_ci, 0); argc = calling->argc+1; ci_entry.flag = VM_CALL_FCALL | VM_CALL_OPT_SEND; @@ -2597,14 +2602,14 @@ vm_call_method_each_type(rb_execution_context_t *ec, rb_control_frame_t *cfp, st return vm_call_cfunc(ec, cfp, calling, ci, cc); case VM_METHOD_TYPE_ATTRSET: - CALLER_SETUP_ARG(cfp, calling, ci); + CALLER_SETUP_ARG(cfp, calling, ci, 0); rb_check_arity(calling->argc, 1, 1); cc->aux.index = 0; CC_SET_FASTPATH(cc, vm_call_attrset, !((ci->flag & VM_CALL_ARGS_SPLAT) || (ci->flag & VM_CALL_KWARG))); return vm_call_attrset(ec, cfp, calling, ci, cc); case VM_METHOD_TYPE_IVAR: - CALLER_SETUP_ARG(cfp, calling, ci); + CALLER_SETUP_ARG(cfp, calling, ci, 0); rb_check_arity(calling->argc, 0, 0); cc->aux.index = 0; CC_SET_FASTPATH(cc, vm_call_ivar, !(ci->flag & VM_CALL_ARGS_SPLAT)); @@ -2905,7 +2910,7 @@ vm_callee_setup_block_arg(rb_execution_context_t *ec, struct rb_calling_info *ca rb_control_frame_t *cfp = ec->cfp; VALUE arg0; - CALLER_SETUP_ARG(cfp, calling, ci); /* splat arg */ + CALLER_SETUP_ARG(cfp, calling, ci, 1); /* splat arg */ if (arg_setup_type == arg_setup_block && calling->argc == 1 && @@ -2948,6 +2953,7 @@ vm_yield_setup_args(rb_execution_context_t *ec, const rb_iseq_t *iseq, const int calling = &calling_entry; calling->argc = argc; calling->block_handler = block_handler; + calling->recv = Qundef; ci_entry.flag = 0; ci = &ci_entry; @@ -2987,7 +2993,7 @@ vm_invoke_symbol_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, { VALUE val; int argc; - CALLER_SETUP_ARG(ec->cfp, calling, ci); + CALLER_SETUP_ARG(ec->cfp, calling, ci, 0); argc = calling->argc; val = vm_yield_with_symbol(ec, symbol, argc, STACK_ADDR_FROM_TOP(argc), calling->block_handler); POPN(argc); @@ -3001,7 +3007,7 @@ vm_invoke_ifunc_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, { VALUE val; int argc; - CALLER_SETUP_ARG(ec->cfp, calling, ci); + CALLER_SETUP_ARG(ec->cfp, calling, ci, 0); argc = calling->argc; val = vm_yield_with_cfunc(ec, captured, captured->self, argc, STACK_ADDR_FROM_TOP(argc), calling->block_handler, NULL); POPN(argc); /* TODO: should put before C/yield? */