mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
5397dd2e76
Protected characters with `PROTECT_ATTR` should not have special roles. https://github.com/ruby/rdoc/commit/c318af0ea2
390 lines
9.8 KiB
Ruby
390 lines
9.8 KiB
Ruby
# frozen_string_literal: true
|
|
##
|
|
# Manages changes of attributes in a block of text
|
|
|
|
class RDoc::Markup::AttributeManager
|
|
|
|
##
|
|
# The NUL character
|
|
|
|
NULL = "\000".freeze
|
|
|
|
#--
|
|
# We work by substituting non-printing characters in to the text. For now
|
|
# I'm assuming that I can substitute a character in the range 0..8 for a 7
|
|
# bit character without damaging the encoded string, but this might be
|
|
# optimistic
|
|
#++
|
|
|
|
A_PROTECT = 004 # :nodoc:
|
|
|
|
##
|
|
# Special mask character to prevent inline markup handling
|
|
|
|
PROTECT_ATTR = A_PROTECT.chr # :nodoc:
|
|
|
|
##
|
|
# The attributes enabled for this markup object.
|
|
|
|
attr_reader :attributes
|
|
|
|
##
|
|
# This maps delimiters that occur around words (such as *bold* or +tt+)
|
|
# where the start and end delimiters and the same. This lets us optimize
|
|
# the regexp
|
|
|
|
attr_reader :matching_word_pairs
|
|
|
|
##
|
|
# And this is used when the delimiters aren't the same. In this case the
|
|
# hash maps a pattern to the attribute character
|
|
|
|
attr_reader :word_pair_map
|
|
|
|
##
|
|
# This maps HTML tags to the corresponding attribute char
|
|
|
|
attr_reader :html_tags
|
|
|
|
##
|
|
# A \ in front of a character that would normally be processed turns off
|
|
# processing. We do this by turning \< into <#{PROTECT}
|
|
|
|
attr_reader :protectable
|
|
|
|
##
|
|
# And this maps _regexp handling_ sequences to a name. A regexp handling
|
|
# sequence is something like a WikiWord
|
|
|
|
attr_reader :regexp_handlings
|
|
|
|
##
|
|
# A bits of exclusive maps
|
|
attr_reader :exclusive_bitmap
|
|
|
|
##
|
|
# Creates a new attribute manager that understands bold, emphasized and
|
|
# teletype text.
|
|
|
|
def initialize
|
|
@html_tags = {}
|
|
@matching_word_pairs = {}
|
|
@protectable = %w[<]
|
|
@regexp_handlings = []
|
|
@word_pair_map = {}
|
|
@exclusive_bitmap = 0
|
|
@attributes = RDoc::Markup::Attributes.new
|
|
|
|
add_word_pair "*", "*", :BOLD, true
|
|
add_word_pair "_", "_", :EM, true
|
|
add_word_pair "+", "+", :TT, true
|
|
|
|
add_html "em", :EM, true
|
|
add_html "i", :EM, true
|
|
add_html "b", :BOLD, true
|
|
add_html "tt", :TT, true
|
|
add_html "code", :TT, true
|
|
end
|
|
|
|
##
|
|
# Return an attribute object with the given turn_on and turn_off bits set
|
|
|
|
def attribute(turn_on, turn_off)
|
|
RDoc::Markup::AttrChanger.new turn_on, turn_off
|
|
end
|
|
|
|
##
|
|
# Changes the current attribute from +current+ to +new+
|
|
|
|
def change_attribute current, new
|
|
diff = current ^ new
|
|
attribute(new & diff, current & diff)
|
|
end
|
|
|
|
##
|
|
# Used by the tests to change attributes by name from +current_set+ to
|
|
# +new_set+
|
|
|
|
def changed_attribute_by_name current_set, new_set
|
|
current = new = 0
|
|
current_set.each do |name|
|
|
current |= @attributes.bitmap_for(name)
|
|
end
|
|
|
|
new_set.each do |name|
|
|
new |= @attributes.bitmap_for(name)
|
|
end
|
|
|
|
change_attribute(current, new)
|
|
end
|
|
|
|
##
|
|
# Copies +start_pos+ to +end_pos+ from the current string
|
|
|
|
def copy_string(start_pos, end_pos)
|
|
res = @str[start_pos...end_pos]
|
|
res.gsub!(/\000/, '')
|
|
res
|
|
end
|
|
|
|
def exclusive?(attr)
|
|
(attr & @exclusive_bitmap) != 0
|
|
end
|
|
|
|
NON_PRINTING_START = "\1" # :nodoc:
|
|
NON_PRINTING_END = "\2" # :nodoc:
|
|
|
|
##
|
|
# Map attributes like <b>text</b>to the sequence
|
|
# \001\002<char>\001\003<char>, where <char> is a per-attribute specific
|
|
# character
|
|
|
|
def convert_attrs(str, attrs, exclusive = false)
|
|
convert_attrs_matching_word_pairs(str, attrs, exclusive)
|
|
convert_attrs_word_pair_map(str, attrs, exclusive)
|
|
end
|
|
|
|
def convert_attrs_matching_word_pairs(str, attrs, exclusive)
|
|
# first do matching ones
|
|
tags = @matching_word_pairs.select { |start, bitmap|
|
|
exclusive == exclusive?(bitmap)
|
|
}.keys
|
|
return if tags.empty?
|
|
tags = "[#{tags.join("")}](?!#{PROTECT_ATTR})"
|
|
all_tags = "[#{@matching_word_pairs.keys.join("")}](?!#{PROTECT_ATTR})"
|
|
|
|
re = /(^|\W|#{all_tags})(#{tags})(\2*[#\\]?[\w:#{PROTECT_ATTR}.\/\[\]-]+?\S?)\2(?!\2)(#{all_tags}|\W|$)/
|
|
|
|
1 while str.gsub!(re) { |orig|
|
|
attr = @matching_word_pairs[$2]
|
|
attr_updated = attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
|
|
if attr_updated
|
|
$1 + NULL * $2.length + $3 + NULL * $2.length + $4
|
|
else
|
|
$1 + NON_PRINTING_START + $2 + NON_PRINTING_END + $3 + NON_PRINTING_START + $2 + NON_PRINTING_END + $4
|
|
end
|
|
}
|
|
str.delete!(NON_PRINTING_START + NON_PRINTING_END)
|
|
end
|
|
|
|
def convert_attrs_word_pair_map(str, attrs, exclusive)
|
|
# then non-matching
|
|
unless @word_pair_map.empty? then
|
|
@word_pair_map.each do |regexp, attr|
|
|
next unless exclusive == exclusive?(attr)
|
|
1 while str.gsub!(regexp) { |orig|
|
|
updated = attrs.set_attrs($`.length + $1.length, $2.length, attr)
|
|
if updated
|
|
NULL * $1.length + $2 + NULL * $3.length
|
|
else
|
|
orig
|
|
end
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
##
|
|
# Converts HTML tags to RDoc attributes
|
|
|
|
def convert_html(str, attrs, exclusive = false)
|
|
tags = @html_tags.select { |start, bitmap|
|
|
exclusive == exclusive?(bitmap)
|
|
}.keys.join '|'
|
|
|
|
1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) { |orig|
|
|
attr = @html_tags[$1.downcase]
|
|
html_length = $1.length + 2
|
|
seq = NULL * html_length
|
|
attrs.set_attrs($`.length + html_length, $2.length, attr)
|
|
seq + $2 + seq + NULL
|
|
}
|
|
end
|
|
|
|
##
|
|
# Converts regexp handling sequences to RDoc attributes
|
|
|
|
def convert_regexp_handlings str, attrs, exclusive = false
|
|
@regexp_handlings.each do |regexp, attribute|
|
|
next unless exclusive == exclusive?(attribute)
|
|
str.scan(regexp) do
|
|
capture = $~.size == 1 ? 0 : 1
|
|
|
|
s, e = $~.offset capture
|
|
|
|
attrs.set_attrs s, e - s, attribute | @attributes.regexp_handling
|
|
end
|
|
end
|
|
end
|
|
|
|
##
|
|
# Escapes regexp handling sequences of text to prevent conversion to RDoc
|
|
|
|
def mask_protected_sequences
|
|
# protect __send__, __FILE__, etc.
|
|
@str.gsub!(/__([a-z]+)__/i,
|
|
"_#{PROTECT_ATTR}_#{PROTECT_ATTR}\\1_#{PROTECT_ATTR}_#{PROTECT_ATTR}")
|
|
@str.gsub!(/(\A|[^\\])\\([#{Regexp.escape @protectable.join}])/m,
|
|
"\\1\\2#{PROTECT_ATTR}")
|
|
@str.gsub!(/\\(\\[#{Regexp.escape @protectable.join}])/m, "\\1")
|
|
end
|
|
|
|
##
|
|
# Unescapes regexp handling sequences of text
|
|
|
|
def unmask_protected_sequences
|
|
@str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
|
|
end
|
|
|
|
##
|
|
# Adds a markup class with +name+ for words wrapped in the +start+ and
|
|
# +stop+ character. To make words wrapped with "*" bold:
|
|
#
|
|
# am.add_word_pair '*', '*', :BOLD
|
|
|
|
def add_word_pair(start, stop, name, exclusive = false)
|
|
raise ArgumentError, "Word flags may not start with '<'" if
|
|
start[0,1] == '<'
|
|
|
|
bitmap = @attributes.bitmap_for name
|
|
|
|
if start == stop then
|
|
@matching_word_pairs[start] = bitmap
|
|
else
|
|
pattern = /(#{Regexp.escape start})(\S+)(#{Regexp.escape stop})/
|
|
@word_pair_map[pattern] = bitmap
|
|
end
|
|
|
|
@protectable << start[0,1]
|
|
@protectable.uniq!
|
|
|
|
@exclusive_bitmap |= bitmap if exclusive
|
|
end
|
|
|
|
##
|
|
# Adds a markup class with +name+ for words surrounded by HTML tag +tag+.
|
|
# To process emphasis tags:
|
|
#
|
|
# am.add_html 'em', :EM
|
|
|
|
def add_html(tag, name, exclusive = false)
|
|
bitmap = @attributes.bitmap_for name
|
|
@html_tags[tag.downcase] = bitmap
|
|
@exclusive_bitmap |= bitmap if exclusive
|
|
end
|
|
|
|
##
|
|
# Adds a regexp handling for +pattern+ with +name+. A simple URL handler
|
|
# would be:
|
|
#
|
|
# @am.add_regexp_handling(/((https?:)\S+\w)/, :HYPERLINK)
|
|
|
|
def add_regexp_handling pattern, name, exclusive = false
|
|
bitmap = @attributes.bitmap_for(name)
|
|
@regexp_handlings << [pattern, bitmap]
|
|
@exclusive_bitmap |= bitmap if exclusive
|
|
end
|
|
|
|
##
|
|
# Processes +str+ converting attributes, HTML and regexp handlings
|
|
|
|
def flow str
|
|
@str = str.dup
|
|
|
|
mask_protected_sequences
|
|
|
|
@attrs = RDoc::Markup::AttrSpan.new @str.length, @exclusive_bitmap
|
|
|
|
convert_attrs @str, @attrs, true
|
|
convert_html @str, @attrs, true
|
|
convert_regexp_handlings @str, @attrs, true
|
|
convert_attrs @str, @attrs
|
|
convert_html @str, @attrs
|
|
convert_regexp_handlings @str, @attrs
|
|
|
|
unmask_protected_sequences
|
|
|
|
split_into_flow
|
|
end
|
|
|
|
##
|
|
# Debug method that prints a string along with its attributes
|
|
|
|
def display_attributes
|
|
puts
|
|
puts @str.tr(NULL, "!")
|
|
bit = 1
|
|
16.times do |bno|
|
|
line = ""
|
|
@str.length.times do |i|
|
|
if (@attrs[i] & bit) == 0
|
|
line << " "
|
|
else
|
|
if bno.zero?
|
|
line << "S"
|
|
else
|
|
line << ("%d" % (bno+1))
|
|
end
|
|
end
|
|
end
|
|
puts(line) unless line =~ /^ *$/
|
|
bit <<= 1
|
|
end
|
|
end
|
|
|
|
##
|
|
# Splits the string into chunks by attribute change
|
|
|
|
def split_into_flow
|
|
res = []
|
|
current_attr = 0
|
|
|
|
str_len = @str.length
|
|
|
|
# skip leading invisible text
|
|
i = 0
|
|
i += 1 while i < str_len and @str[i].chr == "\0"
|
|
start_pos = i
|
|
|
|
# then scan the string, chunking it on attribute changes
|
|
while i < str_len
|
|
new_attr = @attrs[i]
|
|
if new_attr != current_attr
|
|
if i > start_pos
|
|
res << copy_string(start_pos, i)
|
|
start_pos = i
|
|
end
|
|
|
|
res << change_attribute(current_attr, new_attr)
|
|
current_attr = new_attr
|
|
|
|
if (current_attr & @attributes.regexp_handling) != 0 then
|
|
i += 1 while
|
|
i < str_len and (@attrs[i] & @attributes.regexp_handling) != 0
|
|
|
|
res << RDoc::Markup::RegexpHandling.new(current_attr,
|
|
copy_string(start_pos, i))
|
|
start_pos = i
|
|
next
|
|
end
|
|
end
|
|
|
|
# move on, skipping any invisible characters
|
|
begin
|
|
i += 1
|
|
end while i < str_len and @str[i].chr == "\0"
|
|
end
|
|
|
|
# tidy up trailing text
|
|
if start_pos < str_len
|
|
res << copy_string(start_pos, str_len)
|
|
end
|
|
|
|
# and reset to all attributes off
|
|
res << change_attribute(current_attr, 0) if current_attr != 0
|
|
|
|
res
|
|
end
|
|
|
|
end
|
|
|