1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/rdoc/markup/attribute_manager.rb
nagachika 2a70d21484 merge revision(s) b1c73f239fe9af97de837331849f55d67c27561e,bb570ce6d80d28cfc7131dcb72885eed2f989b30,b88d1e6b44164bca0c2b85ea6639469813e1e1d8,127f735c1e5e0771076caf2a74390757a42fb177,f3f1a666c77f8c528b0adc9ccf78e4b9910bd6e0,fa048a0f8523cefde5428805dd334691486319e6,d5d1c41728d65acfb8aa2cf95f2d8ac88f271cd1,52ebaf718e6a78297ceb0dff49815eeed28eae45,7fe22152fc28084f4395fece84ff6e5eb2d6b288,19e6d271266eca5925e66fc8ec39b251a2fa6bcd,05898c5b9001c0b1e8bd7bf0d12b42a8e7c388b8,3651f678a719ae3a35825bcb4e0dabbc7c60d8df,10b082064e6ab0943cce4ef43e567d8044c7096d,de8e6218a3257fe19b46ff0aa157e66f452ac8b7,971a0cd246db6578e1ea8760a903e1a23e3681f3,61a29a41e1d0d11a9963315aa86d25ed690124c0,ff9a00887161840eb8a34d53749a7d7962181cfe,fad3412d475b57055f426cf4e86eafeab516672b,54aa11efa8b1be2c5d20402890d6d2fa90aa19a8,e84d275fe6d0c14ba58ce73b13323879c060b7ae,61e1cf23ac0d122fba3ad4cbaa402c7c94ad54d3,a6948329f8f89fb390215086fe4a888915fd589e,64b991b0cd98ee8f23266b8cbea0fa34bdaab1ec,b1c73f239fe9af97de837331849f55d67c27561e,0ee24b4fab1a1faef600a42c29863e1c3edd8c61: [Backport #17877]
Synchronize rdoc with upstream version v6.3.1.

	[ruby/rdoc] Use File.open to fix the OS Command Injection
	 vulnerability in CVE-2021-31799

	a7f5d6ab88
	---
	 lib/rdoc/rdoc.rb            |  2 +-
	 test/rdoc/test_rdoc_rdoc.rb | 12 ++++++++++++
	 2 files changed, 13 insertions(+), 1 deletion(-)

	[ruby/rdoc] Support ChangeLog generated by `git log`

	5e0a123ca1
	---
	 lib/rdoc/parser/changelog.rb            | 35 +++++++++++++++++++++
	 test/rdoc/test_rdoc_parser_changelog.rb | 56 +++++++++++++++++++++++++++++++++
	 2 files changed, 91 insertions(+)

	[ruby/rdoc] Make each commit entries h3

	11eefb2ae9
	---
	 lib/rdoc/parser/changelog.rb            | 12 ++++++------
	 test/rdoc/test_rdoc_parser_changelog.rb | 12 ++++--------
	 2 files changed, 10 insertions(+), 14 deletions(-)

	[ruby/rdoc] Tweak log entry markdown

	* add 3 levels to headings
	* prefix commit log to labels to make unique IDs

	5074c13209
	---
	 lib/rdoc/parser/changelog.rb            | 20 +++++++++++++++++-
	 test/rdoc/test_rdoc_parser_changelog.rb | 36 ++++++++++++++++++++++++++++++++-
	 2 files changed, 54 insertions(+), 2 deletions(-)

	[ruby/rdoc] Shorten commit hashes

	5d3e153963
	---
	 lib/rdoc/parser/changelog.rb            | 2 +-
	 test/rdoc/test_rdoc_parser_changelog.rb | 2 +-
	 2 files changed, 2 insertions(+), 2 deletions(-)

	[ruby/rdoc] Add links to the commits

	1821628076
	---
	 lib/rdoc/parser/changelog.rb            | 107 +++++++++++++++++++++++++-------
	 test/rdoc/test_rdoc_parser_changelog.rb |  36 ++++++-----
	 2 files changed, 107 insertions(+), 36 deletions(-)

	[ruby/rdoc] Sort by CommitDate if available

	455715e930
	---
	 lib/rdoc/parser/changelog.rb            | 11 ++++++---
	 test/rdoc/test_rdoc_parser_changelog.rb | 40 +++++++++++++++++++++++++++++++++
	 2 files changed, 48 insertions(+), 3 deletions(-)

	[ruby/rdoc] Skip non-date logs by git-log

	`RDoc::Parser::ChangeLog` mis-parses ChangeLog generated by
	git-log, because of too heuristic `Time.parse`.

	For instance, "commit 8187228de0"
	results in "8187-08-16", that is, day 228 in the year 8187.

	9711e6f6d9
	---
	 lib/rdoc/parser/changelog.rb            | 36 ++++++++++++++++++++-------------
	 test/rdoc/test_rdoc_parser_changelog.rb |  2 ++
	 2 files changed, 24 insertions(+), 14 deletions(-)

	[ruby/rdoc] Support other date formats in git-log

	ad8cf37d72
	---
	 lib/rdoc/parser/changelog.rb            |  6 +++---
	 test/rdoc/test_rdoc_parser_changelog.rb | 22 ++++++++++++++++++++++
	 2 files changed, 25 insertions(+), 3 deletions(-)

	[ruby/rdoc] Support iso-strict format in git-log

	2a6c22da63
	---
	 lib/rdoc/parser/changelog.rb            |  6 +++---
	 test/rdoc/test_rdoc_parser_changelog.rb | 11 +++++++++++
	 2 files changed, 14 insertions(+), 3 deletions(-)

	[ruby/rdoc] Update Rdoc.css sidebar panel.

	Updates css so the sidebar look like a panel instead of looking like chopped edges.

	b0098c6d72
	---
	 lib/rdoc/generator/template/darkfish/css/rdoc.css | 1 +
	 1 file changed, 1 insertion(+)

	[ruby/rdoc] Support GFM table

	9dc933df16
	---
	 lib/rdoc/markdown.rb                    | 349 +++++++++++++++++++++++++++++++-
	 lib/rdoc/markup.rb                      |   1 +
	 lib/rdoc/markup/table.rb                |  47 +++++
	 lib/rdoc/markup/to_html.rb              |  23 +++
	 lib/rdoc/markup/to_joined_paragraph.rb  |   1 +
	 lib/rdoc/markup/to_rdoc.rb              |  28 +++
	 lib/rdoc/markup/to_table_of_contents.rb |   1 +
	 test/rdoc/test_rdoc_markdown.rb         |  23 +++
	 8 files changed, 471 insertions(+), 2 deletions(-)
	 create mode 100644 lib/rdoc/markup/table.rb

	[ruby/rdoc] Add table style

	2219c5ae80
	---
	 lib/rdoc/generator/template/darkfish/css/rdoc.css | 19 +++++++++++++++++++
	 1 file changed, 19 insertions(+)

	[ruby/rdoc] Fixed CodeFence without blank lines

	Currently a fenced code block needs a preceding blank line, it
	should not be required, as:
	https://github.github.com/gfm/#fenced-code-blocks
	> A fenced code block may interrupt a paragraph, and does not
	> require a blank line either before or after.

	Just recommended:
	https://docs.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks
	> We recommend placing a blank line before and after code blocks
	> to make the raw formatting easier to read.

	0e1776caf3
	---
	 lib/rdoc/markdown.rb            | 447 +++++++---------------------------------
	 test/rdoc/test_rdoc_markdown.rb |  36 +++-
	 2 files changed, 112 insertions(+), 371 deletions(-)

	[ruby/rdoc] Allow partial default values to be overridden with
	 .rdoc_options

	e14800891f
	---
	 lib/rdoc/options.rb         | 34 +++++++++++++++++++++++++++++++++-
	 lib/rdoc/rdoc.rb            |  7 ++++++-
	 test/rdoc/test_rdoc_rdoc.rb | 12 ++++++++++++
	 3 files changed, 51 insertions(+), 2 deletions(-)

	[ruby/rdoc] Allow empty .rdoc_options

	0c8cb25b50
	---
	 lib/rdoc/rdoc.rb            |  2 ++
	 test/rdoc/test_rdoc_rdoc.rb | 11 +++++++++++
	 2 files changed, 13 insertions(+)

	[ruby/rdoc] Suppress unused variable warning of "text"

	3a4120b155
	---
	 lib/rdoc/markdown.rb | 345 ++++++++++++++++++++++++++++++++++++++++++++++++++-
	 1 file changed, 343 insertions(+), 2 deletions(-)

	[ruby/rdoc] Get rid of a trailing space

	7b7b91768e
	---
	 lib/rdoc/markdown.rb | 7 +++----
	 1 file changed, 3 insertions(+), 4 deletions(-)

	[ruby/rdoc] Disable other notations in <code> tags

	0cd3b55210
	---
	 lib/rdoc/markup/attr_span.rb                    |  10 ++-
	 lib/rdoc/markup/attribute_manager.rb            | 102 ++++++++++++++++++------
	 test/rdoc/test_rdoc_markup_attribute_manager.rb |  24 +++++-
	 test/rdoc/test_rdoc_markup_to_html.rb           |   6 ++
	 4 files changed, 114 insertions(+), 28 deletions(-)

	[ruby/rdoc] Treat other tags as word boundaries

	8222f85a17
	---
	 lib/rdoc/markup/attribute_manager.rb  | 17 +++++++++++++----
	 test/rdoc/test_rdoc_markup_to_html.rb |  5 +++++
	 2 files changed, 18 insertions(+), 4 deletions(-)

	[ruby/rdoc] Treat emphasis tags as excluding other notations

	And exclusive notations don't exclude other exclusive notations.

	b8baa9a435
	---
	 lib/rdoc/markup/attr_span.rb                    |  2 +-
	 lib/rdoc/markup/attribute_manager.rb            | 10 +++++-----
	 test/rdoc/test_rdoc_markup_attribute_manager.rb |  1 +
	 3 files changed, 7 insertions(+), 6 deletions(-)

	[ruby/rdoc] Clarify that dots in URL are replaced

	The dots in all path components from the document root are
	replaced with underscores, not only in the basename.

	7a3417ea4c
	---
	 test/rdoc/test_rdoc_top_level.rb | 3 +++
	 1 file changed, 3 insertions(+)

	[ruby/rdoc] Links to document texts without "rdoc-ref:" prefix

	While links to generated HTML from RDoc file needs to be prefixed
	by "rdoc-ref:" currently, in case of explicit references this
	seems just redundant.

	Also GitHub RDoc support does not work with this prefix.

	This patch lets links to such document texts (".rb", ".rdoc" and
	".md" now) refer URLs generated by `RDoc::TopLevel#http_url`
	without the prefix.

	f18b27b69d
	---
	 lib/rdoc/markup/to_html.rb            |  4 ++++
	 test/rdoc/test_rdoc_markup_to_html.rb | 21 +++++++++++++++++++++
	 2 files changed, 25 insertions(+)

	[ruby/rdoc] Use File.open to fix the OS Command Injection
	 vulnerability in CVE-2021-31799

	a7f5d6ab88
	---
	 lib/rdoc/rdoc.rb            |  2 +-
	 test/rdoc/test_rdoc_rdoc.rb | 12 ++++++++++++
	 2 files changed, 13 insertions(+), 1 deletion(-)

	[ruby/rdoc] Version 6.3.1

	9307f932b7
	---
	 lib/rdoc/version.rb | 2 +-
	 1 file changed, 1 insertion(+), 1 deletion(-)
2021-05-22 14:51:55 +09:00

409 lines
10 KiB
Ruby

# frozen_string_literal: true
##
# Manages changes of attributes in a block of text
class RDoc::Markup::AttributeManager
##
# The NUL character
NULL = "\000".freeze
#--
# We work by substituting non-printing characters in to the text. For now
# I'm assuming that I can substitute a character in the range 0..8 for a 7
# bit character without damaging the encoded string, but this might be
# optimistic
#++
A_PROTECT = 004 # :nodoc:
##
# Special mask character to prevent inline markup handling
PROTECT_ATTR = A_PROTECT.chr # :nodoc:
##
# The attributes enabled for this markup object.
attr_reader :attributes
##
# This maps delimiters that occur around words (such as *bold* or +tt+)
# where the start and end delimiters and the same. This lets us optimize
# the regexp
attr_reader :matching_word_pairs
##
# And this is used when the delimiters aren't the same. In this case the
# hash maps a pattern to the attribute character
attr_reader :word_pair_map
##
# This maps HTML tags to the corresponding attribute char
attr_reader :html_tags
##
# A \ in front of a character that would normally be processed turns off
# processing. We do this by turning \< into <#{PROTECT}
attr_reader :protectable
##
# And this maps _regexp handling_ sequences to a name. A regexp handling
# sequence is something like a WikiWord
attr_reader :regexp_handlings
##
# A bits of exclusive maps
attr_reader :exclusive_bitmap
##
# Creates a new attribute manager that understands bold, emphasized and
# teletype text.
def initialize
@html_tags = {}
@matching_word_pairs = {}
@protectable = %w[<]
@regexp_handlings = []
@word_pair_map = {}
@exclusive_bitmap = 0
@attributes = RDoc::Markup::Attributes.new
add_word_pair "*", "*", :BOLD, true
add_word_pair "_", "_", :EM, true
add_word_pair "+", "+", :TT, true
add_html "em", :EM, true
add_html "i", :EM, true
add_html "b", :BOLD, true
add_html "tt", :TT, true
add_html "code", :TT, true
end
##
# Return an attribute object with the given turn_on and turn_off bits set
def attribute(turn_on, turn_off)
RDoc::Markup::AttrChanger.new turn_on, turn_off
end
##
# Changes the current attribute from +current+ to +new+
def change_attribute current, new
diff = current ^ new
attribute(new & diff, current & diff)
end
##
# Used by the tests to change attributes by name from +current_set+ to
# +new_set+
def changed_attribute_by_name current_set, new_set
current = new = 0
current_set.each do |name|
current |= @attributes.bitmap_for(name)
end
new_set.each do |name|
new |= @attributes.bitmap_for(name)
end
change_attribute(current, new)
end
##
# Copies +start_pos+ to +end_pos+ from the current string
def copy_string(start_pos, end_pos)
res = @str[start_pos...end_pos]
res.gsub!(/\000/, '')
res
end
def exclusive?(attr)
(attr & @exclusive_bitmap) != 0
end
NON_PRINTING_START = "\1" # :nodoc:
NON_PRINTING_END = "\2" # :nodoc:
##
# Map attributes like <b>text</b>to the sequence
# \001\002<char>\001\003<char>, where <char> is a per-attribute specific
# character
def convert_attrs(str, attrs, exclusive = false)
convert_attrs_matching_word_pairs(str, attrs, exclusive)
convert_attrs_word_pair_map(str, attrs, exclusive)
end
def convert_attrs_matching_word_pairs(str, attrs, exclusive)
# first do matching ones
tags = @matching_word_pairs.select { |start, bitmap|
if exclusive && exclusive?(bitmap)
true
elsif !exclusive && !exclusive?(bitmap)
true
else
false
end
}.keys
return if tags.empty?
all_tags = @matching_word_pairs.keys
re = /(^|\W|[#{all_tags.join("")}])([#{tags.join("")}])(\2*[#\\]?[\w:.\/\[\]-]+?\S?)\2(?!\2)([#{all_tags.join("")}]|\W|$)/
1 while str.gsub!(re) { |orig|
attr = @matching_word_pairs[$2]
attr_updated = attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
if attr_updated
$1 + NULL * $2.length + $3 + NULL * $2.length + $4
else
$1 + NON_PRINTING_START + $2 + NON_PRINTING_END + $3 + NON_PRINTING_START + $2 + NON_PRINTING_END + $4
end
}
str.delete!(NON_PRINTING_START + NON_PRINTING_END)
end
def convert_attrs_word_pair_map(str, attrs, exclusive)
# then non-matching
unless @word_pair_map.empty? then
@word_pair_map.each do |regexp, attr|
if !exclusive
next if exclusive?(attr)
else
next if !exclusive?(attr)
end
1 while str.gsub!(regexp) { |orig|
updated = attrs.set_attrs($`.length + $1.length, $2.length, attr)
if updated
NULL * $1.length + $2 + NULL * $3.length
else
orig
end
}
end
end
end
##
# Converts HTML tags to RDoc attributes
def convert_html(str, attrs, exclusive = false)
tags = @html_tags.select { |start, bitmap|
if exclusive && exclusive?(bitmap)
true
elsif !exclusive && !exclusive?(bitmap)
true
else
false
end
}.keys.join '|'
1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) { |orig|
attr = @html_tags[$1.downcase]
html_length = $1.length + 2
seq = NULL * html_length
attrs.set_attrs($`.length + html_length, $2.length, attr)
seq + $2 + seq + NULL
}
end
##
# Converts regexp handling sequences to RDoc attributes
def convert_regexp_handlings str, attrs, exclusive = false
@regexp_handlings.each do |regexp, attribute|
if exclusive
next if !exclusive?(attribute)
else
next if exclusive?(attribute)
end
str.scan(regexp) do
capture = $~.size == 1 ? 0 : 1
s, e = $~.offset capture
attrs.set_attrs s, e - s, attribute | @attributes.regexp_handling
end
end
end
##
# Escapes regexp handling sequences of text to prevent conversion to RDoc
def mask_protected_sequences
# protect __send__, __FILE__, etc.
@str.gsub!(/__([a-z]+)__/i,
"_#{PROTECT_ATTR}_#{PROTECT_ATTR}\\1_#{PROTECT_ATTR}_#{PROTECT_ATTR}")
@str.gsub!(/(\A|[^\\])\\([#{Regexp.escape @protectable.join}])/m,
"\\1\\2#{PROTECT_ATTR}")
@str.gsub!(/\\(\\[#{Regexp.escape @protectable.join}])/m, "\\1")
end
##
# Unescapes regexp handling sequences of text
def unmask_protected_sequences
@str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
end
##
# Adds a markup class with +name+ for words wrapped in the +start+ and
# +stop+ character. To make words wrapped with "*" bold:
#
# am.add_word_pair '*', '*', :BOLD
def add_word_pair(start, stop, name, exclusive = false)
raise ArgumentError, "Word flags may not start with '<'" if
start[0,1] == '<'
bitmap = @attributes.bitmap_for name
if start == stop then
@matching_word_pairs[start] = bitmap
else
pattern = /(#{Regexp.escape start})(\S+)(#{Regexp.escape stop})/
@word_pair_map[pattern] = bitmap
end
@protectable << start[0,1]
@protectable.uniq!
@exclusive_bitmap |= bitmap if exclusive
end
##
# Adds a markup class with +name+ for words surrounded by HTML tag +tag+.
# To process emphasis tags:
#
# am.add_html 'em', :EM
def add_html(tag, name, exclusive = false)
bitmap = @attributes.bitmap_for name
@html_tags[tag.downcase] = bitmap
@exclusive_bitmap |= bitmap if exclusive
end
##
# Adds a regexp handling for +pattern+ with +name+. A simple URL handler
# would be:
#
# @am.add_regexp_handling(/((https?:)\S+\w)/, :HYPERLINK)
def add_regexp_handling pattern, name, exclusive = false
bitmap = @attributes.bitmap_for(name)
@regexp_handlings << [pattern, bitmap]
@exclusive_bitmap |= bitmap if exclusive
end
##
# Processes +str+ converting attributes, HTML and regexp handlings
def flow str
@str = str.dup
mask_protected_sequences
@attrs = RDoc::Markup::AttrSpan.new @str.length, @exclusive_bitmap
convert_attrs @str, @attrs, true
convert_html @str, @attrs, true
convert_regexp_handlings @str, @attrs, true
convert_attrs @str, @attrs
convert_html @str, @attrs
convert_regexp_handlings @str, @attrs
unmask_protected_sequences
split_into_flow
end
##
# Debug method that prints a string along with its attributes
def display_attributes
puts
puts @str.tr(NULL, "!")
bit = 1
16.times do |bno|
line = ""
@str.length.times do |i|
if (@attrs[i] & bit) == 0
line << " "
else
if bno.zero?
line << "S"
else
line << ("%d" % (bno+1))
end
end
end
puts(line) unless line =~ /^ *$/
bit <<= 1
end
end
##
# Splits the string into chunks by attribute change
def split_into_flow
res = []
current_attr = 0
str_len = @str.length
# skip leading invisible text
i = 0
i += 1 while i < str_len and @str[i].chr == "\0"
start_pos = i
# then scan the string, chunking it on attribute changes
while i < str_len
new_attr = @attrs[i]
if new_attr != current_attr
if i > start_pos
res << copy_string(start_pos, i)
start_pos = i
end
res << change_attribute(current_attr, new_attr)
current_attr = new_attr
if (current_attr & @attributes.regexp_handling) != 0 then
i += 1 while
i < str_len and (@attrs[i] & @attributes.regexp_handling) != 0
res << RDoc::Markup::RegexpHandling.new(current_attr,
copy_string(start_pos, i))
start_pos = i
next
end
end
# move on, skipping any invisible characters
begin
i += 1
end while i < str_len and @str[i].chr == "\0"
end
# tidy up trailing text
if start_pos < str_len
res << copy_string(start_pos, str_len)
end
# and reset to all attributes off
res << change_attribute(current_attr, 0) if current_attr != 0
res
end
end