ast.rb: RubyVM::AST.parse and .of accepts `save_script_lines: true`

This option makes the parser keep the original source as an array of
the original code lines. This feature exploits the mechanism of
`SCRIPT_LINES__` but records only the specified code that is passed to
RubyVM::AST.of or .parse, instead of recording all parsed program texts.
This commit is contained in:
Yusuke Endoh 2021-06-17 23:43:08 +09:00
parent c639b58823
commit acae5f363d
Notes: git 2021-06-18 02:34:57 +09:00
8 changed files with 145 additions and 22 deletions

48
ast.c
View File

@ -64,8 +64,8 @@ ast_new_internal(rb_ast_t *ast, const NODE *node)
return obj;
}
static VALUE rb_ast_parse_str(VALUE str);
static VALUE rb_ast_parse_file(VALUE path);
static VALUE rb_ast_parse_str(VALUE str, VALUE save_script_lines);
static VALUE rb_ast_parse_file(VALUE path, VALUE save_script_lines);
static VALUE
ast_parse_new(void)
@ -85,29 +85,31 @@ ast_parse_done(rb_ast_t *ast)
}
static VALUE
ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str)
ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE save_script_lines)
{
return rb_ast_parse_str(str);
return rb_ast_parse_str(str, save_script_lines);
}
static VALUE
rb_ast_parse_str(VALUE str)
rb_ast_parse_str(VALUE str, VALUE save_script_lines)
{
rb_ast_t *ast = 0;
StringValue(str);
ast = rb_parser_compile_string_path(ast_parse_new(), Qnil, str, 1);
VALUE vparser = ast_parse_new();
if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser);
ast = rb_parser_compile_string_path(vparser, Qnil, str, 1);
return ast_parse_done(ast);
}
static VALUE
ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path)
ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE save_script_lines)
{
return rb_ast_parse_file(path);
return rb_ast_parse_file(path, save_script_lines);
}
static VALUE
rb_ast_parse_file(VALUE path)
rb_ast_parse_file(VALUE path, VALUE save_script_lines)
{
VALUE f;
rb_ast_t *ast = 0;
@ -116,7 +118,9 @@ rb_ast_parse_file(VALUE path)
FilePathValue(path);
f = rb_file_open_str(path, "r");
rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-"));
ast = rb_parser_compile_file_path(ast_parse_new(), Qnil, f, 1);
VALUE vparser = ast_parse_new();
if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser);
ast = rb_parser_compile_file_path(vparser, Qnil, f, 1);
rb_io_close(f);
return ast_parse_done(ast);
}
@ -135,12 +139,14 @@ lex_array(VALUE array, int index)
}
static VALUE
rb_ast_parse_array(VALUE array)
rb_ast_parse_array(VALUE array, VALUE save_script_lines)
{
rb_ast_t *ast = 0;
array = rb_check_array_type(array);
ast = rb_parser_compile_generic(ast_parse_new(), lex_array, Qnil, array, 1);
VALUE vparser = ast_parse_new();
if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser);
ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1);
return ast_parse_done(ast);
}
@ -187,7 +193,7 @@ script_lines(VALUE path)
}
static VALUE
ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body)
ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE save_script_lines)
{
VALUE path, node, lines;
int node_id;
@ -209,13 +215,13 @@ ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body)
path = rb_iseq_path(iseq);
node_id = iseq->body->location.node_id;
if (!NIL_P(lines = script_lines(path))) {
node = rb_ast_parse_array(lines);
node = rb_ast_parse_array(lines, save_script_lines);
}
else if (RSTRING_LEN(path) == 2 && memcmp(RSTRING_PTR(path), "-e", 2) == 0) {
node = rb_ast_parse_str(rb_e_script);
node = rb_ast_parse_str(rb_e_script, save_script_lines);
}
else {
node = rb_ast_parse_file(path);
node = rb_ast_parse_file(path, save_script_lines);
}
return node_find(node, node_id);
@ -698,6 +704,16 @@ ast_node_inspect(rb_execution_context_t *ec, VALUE self)
return str;
}
static VALUE
ast_node_script_lines(rb_execution_context_t *ec, VALUE self)
{
struct ASTNodeData *data;
TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
VALUE ret = data->ast->body.script_lines;
if (!ret) ret = Qnil;
return ret;
}
#include "ast.rbinc"
void

48
ast.rb
View File

@ -29,8 +29,8 @@ module RubyVM::AbstractSyntaxTree
#
# RubyVM::AbstractSyntaxTree.parse("x = 1 + 2")
# # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-1:9>
def self.parse string
Primitive.ast_s_parse string
def self.parse string, save_script_lines: false
Primitive.ast_s_parse string, save_script_lines
end
# call-seq:
@ -44,8 +44,8 @@ module RubyVM::AbstractSyntaxTree
#
# RubyVM::AbstractSyntaxTree.parse_file("my-app/app.rb")
# # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-31:3>
def self.parse_file pathname
Primitive.ast_s_parse_file pathname
def self.parse_file pathname, save_script_lines: false
Primitive.ast_s_parse_file pathname, save_script_lines
end
# call-seq:
@ -63,8 +63,8 @@ module RubyVM::AbstractSyntaxTree
#
# RubyVM::AbstractSyntaxTree.of(method(:hello))
# # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-3:3>
def self.of body
Primitive.ast_s_of body
def self.of body, save_script_lines: false
Primitive.ast_s_of body, save_script_lines
end
# RubyVM::AbstractSyntaxTree::Node instances are created by parse methods in
@ -139,5 +139,41 @@ module RubyVM::AbstractSyntaxTree
def inspect
Primitive.ast_node_inspect
end
# call-seq:
# node.script_lines -> array
#
# Returns the original source code as an array of lines.
#
# Note that this is an API for ruby internal use, debugging,
# and research. Do not use this for any other purpose.
# The compatibility is not guaranteed.
def script_lines
Primitive.ast_node_script_lines
end
# call-seq:
# node.source -> string
#
# Returns the code fragment that corresponds to this AST.
#
# Note that this is an API for ruby internal use, debugging,
# and research. Do not use this for any other purpose.
# The compatibility is not guaranteed.
#
# Also note that this API may return an incomplete code fragment
# that does not parse; for example, a here document following
# an expression may be dropped.
def source
lines = script_lines
if lines
lines = lines[first_lineno - 1 .. last_lineno - 1]
lines[-1] = lines[-1][0...last_column]
lines[0] = lines[0][first_column..-1]
lines.join
else
nil
end
end
end
end

View File

@ -1329,6 +1329,7 @@ new_child_iseq(rb_iseq_t *iseq, const NODE *const node,
ast.root = node;
ast.compile_option = 0;
ast.line_count = -1;
ast.script_lines = Qfalse;
debugs("[new_child_iseq]> ---------------------------------------\n");
int isolated_depth = ISEQ_COMPILE_DATA(iseq)->isolated_depth;

View File

@ -15,6 +15,7 @@ struct rb_iseq_struct; /* in vm_core.h */
/* parse.y */
VALUE rb_parser_set_yydebug(VALUE, VALUE);
void *rb_parser_load_file(VALUE parser, VALUE name);
void rb_parser_save_script_lines(VALUE vparser);
RUBY_SYMBOL_EXPORT_BEGIN
VALUE rb_parser_set_context(VALUE, const struct rb_iseq_struct *, int);

1
node.c
View File

@ -1407,6 +1407,7 @@ rb_ast_mark(rb_ast_t *ast)
iterate_node_values(&nb->markable, mark_ast_value, NULL);
}
if (ast->body.script_lines) rb_gc_mark(ast->body.script_lines);
}
void

1
node.h
View File

@ -399,6 +399,7 @@ typedef struct rb_ast_body_struct {
const NODE *root;
VALUE compile_option;
int line_count;
VALUE script_lines;
} rb_ast_body_t;
typedef struct rb_ast_struct {
VALUE flags;

17
parse.y
View File

@ -337,6 +337,7 @@ struct parser_params {
unsigned int do_loop: 1;
unsigned int do_chomp: 1;
unsigned int do_split: 1;
unsigned int save_script_lines: 1;
NODE *eval_tree_begin;
NODE *eval_tree;
@ -6241,6 +6242,13 @@ yycompile0(VALUE arg)
cov = Qtrue;
}
}
if (p->save_script_lines) {
if (!p->debug_lines) {
p->debug_lines = rb_ary_new();
}
RB_OBJ_WRITE(p->ast, &p->ast->body.script_lines, p->debug_lines);
}
parser_prepare(p);
#define RUBY_DTRACE_PARSE_HOOK(name) \
@ -13186,6 +13194,15 @@ rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main
p->parent_iseq = base;
return vparser;
}
void
rb_parser_save_script_lines(VALUE vparser)
{
struct parser_params *p;
TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p);
p->save_script_lines = 1;
}
#endif
#ifdef RIPPER

View File

@ -372,4 +372,54 @@ class TestAst < Test::Unit::TestCase
_, args = *node.children.last.children[1].children
assert_equal(:a, args.children[rest])
end
def test_save_script_lines_for_parse
node = RubyVM::AbstractSyntaxTree.parse(<<~END, save_script_lines: true)
1.times do
2.times do
end
end
__END__
dummy
END
expected = [
"1.times do\n",
" 2.times do\n",
" end\n",
"end\n",
"__END__\n",
]
assert_equal(expected, node.script_lines)
expected =
"1.times do\n" +
" 2.times do\n" +
" end\n" +
"end"
assert_equal(expected, node.source)
expected =
"do\n" +
" 2.times do\n" +
" end\n" +
"end"
assert_equal(expected, node.children.last.children.last.source)
expected =
"2.times do\n" +
" end"
assert_equal(expected, node.children.last.children.last.children.last.source)
end
def test_save_script_lines_for_of
proc = Proc.new { 1 + 2 }
method = self.method(__method__)
node_proc = RubyVM::AbstractSyntaxTree.of(proc, save_script_lines: true)
node_method = RubyVM::AbstractSyntaxTree.of(method, save_script_lines: true)
assert_equal("{ 1 + 2 }", node_proc.source)
assert_equal("def test_save_script_lines_for_of\n", node_method.source.lines.first)
end
end