1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Optimize dynamic string interpolation for symbol/true/false/nil/0-9

This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.

Speedups (using included benchmarks):
:symbol        :: 60%
0-9            :: 50%
Class/Module   :: 50%
nil/true/false :: 20%
integer        :: 10%
[]             :: 10%
""             :: 3%

One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.

Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.

This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.

This disables 4 YJIT tests.  Those tests should be reenabled after
YJIT optimizes the new objtostring insn.

Implements [Feature #13715]

Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
This commit is contained in:
Jeremy Evans 2021-11-18 15:10:20 -08:00 committed by GitHub
parent 4adb012926
commit b08dacfea3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
Notes: git 2021-11-19 08:10:44 +09:00
Merged: https://github.com/ruby/ruby/pull/5002

Merged-By: jeremyevans <code@jeremyevans.net>
20 changed files with 240 additions and 37 deletions

6
benchmark/vm_dstr_ary.rb Normal file
View file

@ -0,0 +1,6 @@
i = 0
x = y = []
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

View file

@ -0,0 +1,7 @@
i = 0
x = true
y = false
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

View file

@ -0,0 +1,10 @@
i = 0
class A; end unless defined?(A)
module B; end unless defined?(B)
x = A
y = B
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

View file

@ -0,0 +1,7 @@
i = 0
x = 0
y = 9
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

5
benchmark/vm_dstr_int.rb Normal file
View file

@ -0,0 +1,5 @@
i = 0
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{i}bar#{i}baz"
end

6
benchmark/vm_dstr_nil.rb Normal file
View file

@ -0,0 +1,6 @@
i = 0
x = y = nil
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

6
benchmark/vm_dstr_obj.rb Normal file
View file

@ -0,0 +1,6 @@
i = 0
x = y = Object.new
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

View file

@ -0,0 +1,8 @@
i = 0
o = Object.new
def o.to_s; -""; end
x = y = o
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

6
benchmark/vm_dstr_str.rb Normal file
View file

@ -0,0 +1,6 @@
i = 0
x = y = ""
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

6
benchmark/vm_dstr_sym.rb Normal file
View file

@ -0,0 +1,6 @@
i = 0
x = y = :z
while i<6_000_000 # benchmark loop 2
i += 1
str = "foo#{x}bar#{y}baz"
end

View file

@ -9522,6 +9522,7 @@ numeric.$(OBJEXT): $(top_srcdir)/internal/hash.h
numeric.$(OBJEXT): $(top_srcdir)/internal/numeric.h
numeric.$(OBJEXT): $(top_srcdir)/internal/object.h
numeric.$(OBJEXT): $(top_srcdir)/internal/rational.h
numeric.$(OBJEXT): $(top_srcdir)/internal/string.h
numeric.$(OBJEXT): $(top_srcdir)/internal/serial.h
numeric.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
numeric.$(OBJEXT): $(top_srcdir)/internal/util.h
@ -9598,6 +9599,7 @@ numeric.$(OBJEXT): {$(VPATH)}internal/compiler_is/intel.h
numeric.$(OBJEXT): {$(VPATH)}internal/compiler_is/msvc.h
numeric.$(OBJEXT): {$(VPATH)}internal/compiler_is/sunpro.h
numeric.$(OBJEXT): {$(VPATH)}internal/compiler_since.h
numeric.$(OBJEXT): {$(VPATH)}internal/compilers.h
numeric.$(OBJEXT): {$(VPATH)}internal/config.h
numeric.$(OBJEXT): {$(VPATH)}internal/constant_p.h
numeric.$(OBJEXT): {$(VPATH)}internal/core.h

View file

@ -3271,13 +3271,13 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
}
}
if (IS_INSN_ID(iobj, tostring)) {
if (IS_INSN_ID(iobj, anytostring)) {
LINK_ELEMENT *next = iobj->link.next;
/*
* tostring
* anytostring
* concatstrings 1
* =>
* tostring
* anytostring
*/
if (IS_INSN(next) && IS_INSN_ID(next, concatstrings) &&
OPERAND_AT(next, 0) == INT2FIX(1)) {
@ -7642,17 +7642,14 @@ compile_evstr(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i
CHECK(COMPILE_(ret, "nd_body", node, popped));
if (!popped && !all_string_result_p(node)) {
const int line = nd_line(node);
const NODE *line_node = node;
const unsigned int flag = VM_CALL_FCALL;
LABEL *isstr = NEW_LABEL(line);
ADD_INSN(ret, line_node, dup);
ADD_INSN1(ret, line_node, checktype, INT2FIX(T_STRING));
ADD_INSNL(ret, line_node, branchif, isstr);
ADD_INSN(ret, line_node, dup);
ADD_SEND_R(ret, line_node, idTo_s, INT2FIX(0), NULL, INT2FIX(flag), NULL);
ADD_INSN(ret, line_node, tostring);
ADD_LABEL(ret, isstr);
// Note, this dup could be removed if we are willing to change anytostring. It pops
// two VALUEs off the stack when it could work by replacing the top most VALUE.
ADD_INSN(ret, line_node, dup);
ADD_INSN1(ret, line_node, objtostring, new_callinfo(iseq, idTo_s, 0, flag, NULL, FALSE));
ADD_INSN(ret, line_node, anytostring);
}
return COMPILE_OK;
}

View file

@ -381,9 +381,10 @@ concatstrings
val = rb_str_concat_literals(num, STACK_ADDR_FROM_TOP(num));
}
/* push the result of to_s. */
/* Convert the result to string if not already a string.
This is used as a backup if to_s does not return a string. */
DEFINE_INSN
tostring
anytostring
()
(VALUE val, VALUE str)
(VALUE val)
@ -781,6 +782,21 @@ opt_send_without_block
}
}
/* Convert object to string using to_s or equivalent. */
DEFINE_INSN
objtostring
(CALL_DATA cd)
(VALUE recv)
(VALUE val)
// attr bool leaf = false;
{
val = vm_objtostring(GET_ISEQ(), recv, cd);
if (val == Qundef) {
CALL_SIMPLE_METHOD();
}
}
DEFINE_INSN
opt_str_freeze
(VALUE str, CALL_DATA cd)

View file

@ -35,6 +35,7 @@
#include "internal/numeric.h"
#include "internal/object.h"
#include "internal/rational.h"
#include "internal/string.h"
#include "internal/util.h"
#include "internal/variable.h"
#include "ruby/encoding.h"
@ -3631,6 +3632,18 @@ rb_fix2str(VALUE x, int base)
return rb_usascii_str_new(b, e - b);
}
static VALUE rb_fix_to_s_static[10];
MJIT_FUNC_EXPORTED VALUE
rb_fix_to_s(VALUE x)
{
long i = FIX2LONG(x);
if (i >= 0 && i < 10) {
return rb_fix_to_s_static[i];
}
return rb_fix2str(x, 10);
}
/*
* call-seq:
* to_s(base = 10) -> string
@ -3652,8 +3665,8 @@ rb_fix2str(VALUE x, int base)
*
*/
static VALUE
int_to_s(int argc, VALUE *argv, VALUE x)
MJIT_FUNC_EXPORTED VALUE
rb_int_to_s(int argc, VALUE *argv, VALUE x)
{
int base;
@ -5949,7 +5962,7 @@ Init_Numeric(void)
rb_define_singleton_method(rb_cInteger, "sqrt", rb_int_s_isqrt, 1);
rb_define_singleton_method(rb_cInteger, "try_convert", int_s_try_convert, 1);
rb_define_method(rb_cInteger, "to_s", int_to_s, -1);
rb_define_method(rb_cInteger, "to_s", rb_int_to_s, -1);
rb_define_alias(rb_cInteger, "inspect", "to_s");
rb_define_method(rb_cInteger, "allbits?", int_allbits_p, 1);
rb_define_method(rb_cInteger, "anybits?", int_anybits_p, 1);
@ -5999,6 +6012,20 @@ Init_Numeric(void)
rb_define_method(rb_cInteger, "digits", rb_int_digits, -1);
rb_fix_to_s_static[0] = rb_fstring_literal("0");
rb_fix_to_s_static[1] = rb_fstring_literal("1");
rb_fix_to_s_static[2] = rb_fstring_literal("2");
rb_fix_to_s_static[3] = rb_fstring_literal("3");
rb_fix_to_s_static[4] = rb_fstring_literal("4");
rb_fix_to_s_static[5] = rb_fstring_literal("5");
rb_fix_to_s_static[6] = rb_fstring_literal("6");
rb_fix_to_s_static[7] = rb_fstring_literal("7");
rb_fix_to_s_static[8] = rb_fstring_literal("8");
rb_fix_to_s_static[9] = rb_fstring_literal("9");
for(int i = 0; i < 10; i++) {
rb_gc_register_mark_object(rb_fix_to_s_static[i]);
}
/* An obsolete class, use Integer */
rb_define_const(rb_cObject, "Fixnum", rb_cInteger);
rb_deprecate_constant(rb_cObject, "Fixnum");

View file

@ -1248,8 +1248,8 @@ rb_obj_frozen_p(VALUE obj)
* Always returns the empty string.
*/
static VALUE
nil_to_s(VALUE obj)
MJIT_FUNC_EXPORTED VALUE
rb_nil_to_s(VALUE obj)
{
return rb_cNilClass_to_s;
}
@ -1331,8 +1331,8 @@ nil_match(VALUE obj1, VALUE obj2)
* The string representation of <code>true</code> is "true".
*/
static VALUE
true_to_s(VALUE obj)
MJIT_FUNC_EXPORTED VALUE
rb_true_to_s(VALUE obj)
{
return rb_cTrueClass_to_s;
}
@ -1408,8 +1408,8 @@ true_xor(VALUE obj, VALUE obj2)
* The string representation of <code>false</code> is "false".
*/
static VALUE
false_to_s(VALUE obj)
MJIT_FUNC_EXPORTED VALUE
rb_false_to_s(VALUE obj)
{
return rb_cFalseClass_to_s;
}
@ -1585,7 +1585,7 @@ rb_obj_cmp(VALUE obj1, VALUE obj2)
* show information on the thing we're attached to as well.
*/
static VALUE
MJIT_FUNC_EXPORTED VALUE
rb_mod_to_s(VALUE klass)
{
ID id_defined_at;
@ -4586,7 +4586,7 @@ InitVM_Object(void)
rb_cNilClass = rb_define_class("NilClass", rb_cObject);
rb_cNilClass_to_s = rb_fstring_enc_lit("", rb_usascii_encoding());
rb_gc_register_mark_object(rb_cNilClass_to_s);
rb_define_method(rb_cNilClass, "to_s", nil_to_s, 0);
rb_define_method(rb_cNilClass, "to_s", rb_nil_to_s, 0);
rb_define_method(rb_cNilClass, "to_a", nil_to_a, 0);
rb_define_method(rb_cNilClass, "to_h", nil_to_h, 0);
rb_define_method(rb_cNilClass, "inspect", nil_inspect, 0);
@ -4668,7 +4668,7 @@ InitVM_Object(void)
rb_cTrueClass = rb_define_class("TrueClass", rb_cObject);
rb_cTrueClass_to_s = rb_fstring_enc_lit("true", rb_usascii_encoding());
rb_gc_register_mark_object(rb_cTrueClass_to_s);
rb_define_method(rb_cTrueClass, "to_s", true_to_s, 0);
rb_define_method(rb_cTrueClass, "to_s", rb_true_to_s, 0);
rb_define_alias(rb_cTrueClass, "inspect", "to_s");
rb_define_method(rb_cTrueClass, "&", true_and, 1);
rb_define_method(rb_cTrueClass, "|", true_or, 1);
@ -4680,7 +4680,7 @@ InitVM_Object(void)
rb_cFalseClass = rb_define_class("FalseClass", rb_cObject);
rb_cFalseClass_to_s = rb_fstring_enc_lit("false", rb_usascii_encoding());
rb_gc_register_mark_object(rb_cFalseClass_to_s);
rb_define_method(rb_cFalseClass, "to_s", false_to_s, 0);
rb_define_method(rb_cFalseClass, "to_s", rb_false_to_s, 0);
rb_define_alias(rb_cFalseClass, "inspect", "to_s");
rb_define_method(rb_cFalseClass, "&", false_and, 1);
rb_define_method(rb_cFalseClass, "|", false_or, 1);

View file

@ -243,8 +243,8 @@ class TestJIT < Test::Unit::TestCase
end;
end
def test_compile_insn_putstring_concatstrings_tostring
assert_compile_once('"a#{}b" + "c"', result_inspect: '"abc"', insns: %i[putstring concatstrings tostring])
def test_compile_insn_putstring_concatstrings_objtostring
assert_compile_once('"a#{}b" + "c"', result_inspect: '"abc"', insns: %i[putstring concatstrings objtostring])
end
def test_compile_insn_toregexp
@ -482,8 +482,8 @@ class TestJIT < Test::Unit::TestCase
end;
end
def test_compile_insn_checktype
assert_compile_once("#{<<~"begin;"}\n#{<<~'end;'}", result_inspect: '"42"', insns: %i[checktype])
def test_compile_insn_objtostring
assert_compile_once("#{<<~"begin;"}\n#{<<~'end;'}", result_inspect: '"42"', insns: %i[objtostring])
begin;
a = '2'
"4#{a}"

View file

@ -903,4 +903,34 @@ class TestRubyOptimization < Test::Unit::TestCase
raise "END"
end;
end
class Objtostring
end
def test_objtostring
assert_raise(NoMethodError){"#{BasicObject.new}"}
assert_redefine_method('Symbol', 'to_s', <<-'end')
assert_match %r{\A#<Symbol:0x[0-9a-f]+>\z}, "#{:foo}"
end
assert_redefine_method('NilClass', 'to_s', <<-'end')
assert_match %r{\A#<NilClass:0x[0-9a-f]+>\z}, "#{nil}"
end
assert_redefine_method('TrueClass', 'to_s', <<-'end')
assert_match %r{\A#<TrueClass:0x[0-9a-f]+>\z}, "#{true}"
end
assert_redefine_method('FalseClass', 'to_s', <<-'end')
assert_match %r{\A#<FalseClass:0x[0-9a-f]+>\z}, "#{false}"
end
assert_redefine_method('Integer', 'to_s', <<-'end')
(-1..10).each { |i|
assert_match %r{\A#<Integer:0x[0-9a-f]+>\z}, "#{i}"
}
end
assert_equal "TestRubyOptimization::Objtostring", "#{Objtostring}"
assert_match %r{\A#<Class:0x[0-9a-f]+>\z}, "#{Class.new}"
assert_match %r{\A#<Module:0x[0-9a-f]+>\z}, "#{Module.new}"
o = Object.new
def o.to_s; 1; end
assert_match %r{\A#<Object:0x[0-9a-f]+>\z}, "#{o}"
end
end

View file

@ -215,7 +215,7 @@ class TestYJIT < Test::Unit::TestCase
def test_compile_tostring
assert_no_exits('"i am a string #{true}"')
end
end if false # Until objtostring supported
def test_compile_opt_aset
assert_compiles('[1,2,3][2] = 4', insns: %i[opt_aset])
@ -240,7 +240,7 @@ class TestYJIT < Test::Unit::TestCase
def test_compile_regexp
assert_no_exits('/#{true}/')
end
end if false # Until objtostring supported
def test_getlocal_with_level
assert_compiles(<<~RUBY, insns: %i[getlocal opt_plus], result: [[7]])
@ -385,7 +385,7 @@ class TestYJIT < Test::Unit::TestCase
make_str("foo", "bar")
make_str("foo", "bar")
RUBY
end
end if false # Until objtostring supported
def test_string_interpolation_cast
assert_compiles(<<~'RUBY', insns: %i[checktype concatstrings tostring], result: "123")
@ -395,7 +395,7 @@ class TestYJIT < Test::Unit::TestCase
make_str(1, 23)
RUBY
end
end if false # Until objtostring supported
def test_checkkeyword
assert_compiles(<<~'RUBY', insns: %i[checkkeyword], result: [2, 5])

View file

@ -4729,6 +4729,70 @@ vm_sendish(
#endif
}
/* object.c */
VALUE rb_nil_to_s(VALUE);
VALUE rb_true_to_s(VALUE);
VALUE rb_false_to_s(VALUE);
/* numeric.c */
VALUE rb_int_to_s(int argc, VALUE *argv, VALUE x);
VALUE rb_fix_to_s(VALUE);
/* variable.c */
VALUE rb_mod_to_s(VALUE);
VALUE rb_mod_name(VALUE);
static VALUE
vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd)
{
const struct rb_callcache *cc = vm_search_method((VALUE)iseq, cd, recv);
switch (TYPE(recv)) {
case T_STRING:
return recv;
case T_SYMBOL:
if (check_cfunc(vm_cc_cme(cc), rb_sym_to_s)) {
// rb_sym_to_s() allocates a mutable string, but since we are only
// going to use this string for interpolation, it's fine to use the
// frozen string.
return rb_sym2str(recv);
}
break;
case T_MODULE:
case T_CLASS:
if (check_cfunc(vm_cc_cme(cc), rb_mod_to_s)) {
// rb_mod_to_s() allocates a mutable string, but since we are only
// going to use this string for interpolation, it's fine to use the
// frozen string.
VALUE val = rb_mod_name(recv);
if (val == Qnil) {
val = rb_mod_to_s(recv);
}
return val;
}
break;
case T_NIL:
if (check_cfunc(vm_cc_cme(cc), rb_nil_to_s)) {
return rb_nil_to_s(recv);
}
break;
case T_TRUE:
if (check_cfunc(vm_cc_cme(cc), rb_true_to_s)) {
return rb_true_to_s(recv);
}
break;
case T_FALSE:
if (check_cfunc(vm_cc_cme(cc), rb_false_to_s)) {
return rb_false_to_s(recv);
}
break;
case T_FIXNUM:
if (check_cfunc(vm_cc_cme(cc), rb_int_to_s)) {
return rb_fix_to_s(recv);
}
break;
}
return Qundef;
}
static VALUE
vm_opt_str_freeze(VALUE str, int bop, ID id)
{

View file

@ -4286,7 +4286,7 @@ gen_setglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
}
static codegen_status_t
gen_tostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
gen_anytostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
{
// Save the PC and SP because we might make a Ruby call for
// Kernel#set_trace_var
@ -4831,7 +4831,7 @@ yjit_init_codegen(void)
yjit_reg_op(BIN(leave), gen_leave);
yjit_reg_op(BIN(getglobal), gen_getglobal);
yjit_reg_op(BIN(setglobal), gen_setglobal);
yjit_reg_op(BIN(tostring), gen_tostring);
yjit_reg_op(BIN(anytostring), gen_anytostring);
yjit_reg_op(BIN(toregexp), gen_toregexp);
yjit_reg_op(BIN(getspecial), gen_getspecial);
yjit_reg_op(BIN(getclassvariable), gen_getclassvariable);