Refactor and improve performance of RDoc::Markup::Parser

This change introduces a wrapper of StringScanner that is aware of the current position (column and lineno). It has two advantages: faster and more modular. The old code frequently runs `@input.byteslice(0, byte_offset).length` to get the current position, but it was painfully slow. This change keeps track of the position at each scan, which reduces about half of time of "Generating RI format into ..." in Ruby's `make rdoc` (5.5 sec -> 3.0 sec). And the old code used four instance variables (`@input`, `@line`, `@line_pos`, and `@s`) to track the position. This change factors them out into MyStringScanner, so now only one variable (`@s`) is needed.
2022-11-09 12:17:21 -05:00 · 2019-08-07 01:53:56 +09:00 · 2019-08-07 01:53:56 +09:00 · 0a0760aa63
commit 0a0760aa63
parent 9d2fed2ccd
3 changed files with 65 additions and 67 deletions
--- a/lib/rdoc/tom_doc.rb
+++ b/lib/rdoc/tom_doc.rb
@ -242,19 +242,18 @@ class RDoc::TomDoc < RDoc::Markup::Parser

      @tokens << case
                 when @s.scan(/\r?\n/) then
-                   token = [:NEWLINE, @s.matched, *token_pos(pos)]
-                   @line_pos = char_pos @s.pos
-                   @line += 1
+                   token = [:NEWLINE, @s.matched, *pos]
+                   @s.newline!
                   token
                 when @s.scan(/(Examples|Signature)$/) then
-                   @tokens << [:HEADER, 3, *token_pos(pos)]
+                   @tokens << [:HEADER, 3, *pos]

-                   [:TEXT, @s[1], *token_pos(pos)]
+                   [:TEXT, @s[1], *pos]
                 when @s.scan(/([:\w][\w\[\]]*)[ ]+- /) then
-                   [:NOTE, @s[1], *token_pos(pos)]
+                   [:NOTE, @s[1], *pos]
                 else
                   @s.scan(/.*/)
-                   [:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)]
+                   [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
                 end
    end