ruby--ruby/lib/rdoc/markup/simple_markup/inline.rb

341 lines
8.5 KiB
Ruby
Raw Normal View History

module SM
# We manage a set of attributes. Each attribute has a symbol name
# and a bit value
class Attribute
SPECIAL = 1
@@name_to_bitmap = { :_SPECIAL_ => SPECIAL }
@@next_bitmap = 2
def Attribute.bitmap_for(name)
bitmap = @@name_to_bitmap[name]
if !bitmap
bitmap = @@next_bitmap
@@next_bitmap <<= 1
@@name_to_bitmap[name] = bitmap
end
bitmap
end
def Attribute.as_string(bitmap)
return "none" if bitmap.zero?
res = []
@@name_to_bitmap.each do |name, bit|
res << name if (bitmap & bit) != 0
end
res.join(",")
end
def Attribute.each_name_of(bitmap)
@@name_to_bitmap.each do |name, bit|
next if bit == SPECIAL
yield name.to_s if (bitmap & bit) != 0
end
end
end
# An AttrChanger records a change in attributes. It contains
# a bitmap of the attributes to turn on, and a bitmap of those to
# turn off
AttrChanger = Struct.new(:turn_on, :turn_off)
class AttrChanger
def to_s
"Attr: +#{Attribute.as_string(@turn_on)}/-#{Attribute.as_string(@turn_on)}"
end
end
# An array of attributes which parallels the characters in a string
class AttrSpan
def initialize(length)
@attrs = Array.new(length, 0)
end
def set_attrs(start, length, bits)
for i in start ... (start+length)
@attrs[i] |= bits
end
end
def [](n)
@attrs[n]
end
end
##
# Hold details of a special sequence
class Special
attr_reader :type
attr_accessor :text
def initialize(type, text)
@type, @text = type, text
end
def ==(o)
self.text == o.text && self.type == o.type
end
def to_s
"Special: type=#{type}, text=#{text.dump}"
end
end
class AttributeManager
NULL = "\000".freeze
##
# We work by substituting non-printing characters in to the
# text. For now I'm assuming that I can substitute
# a character in the range 0..8 for a 7 bit character
# without damaging the encoded string, but this might
# be optimistic
#
A_PROTECT = 004
PROTECT_ATTR = A_PROTECT.chr
# This maps delimiters that occur around words (such as
# *bold* or +tt+) where the start and end delimiters
# and the same. This lets us optimize the regexp
MATCHING_WORD_PAIRS = {}
# And this is used when the delimiters aren't the same. In this
# case the hash maps a pattern to the attribute character
WORD_PAIR_MAP = {}
# This maps HTML tags to the corresponding attribute char
HTML_TAGS = {}
# And this maps _special_ sequences to a name. A special sequence
# is something like a WikiWord
SPECIAL = {}
# Return an attribute object with the given turn_on
# and turn_off bits set
def attribute(turn_on, turn_off)
AttrChanger.new(turn_on, turn_off)
end
def change_attribute(current, new)
diff = current ^ new
attribute(new & diff, current & diff)
end
def changed_attribute_by_name(current_set, new_set)
current = new = 0
current_set.each {|name| current |= Attribute.bitmap_for(name) }
new_set.each {|name| new |= Attribute.bitmap_for(name) }
change_attribute(current, new)
end
def copy_string(start_pos, end_pos)
res = @str[start_pos...end_pos]
res.gsub!(/\000/, '')
res
end
# Map attributes like <b>text</b>to the sequence \001\002<char>\001\003<char>,
# where <char> is a per-attribute specific character
def convert_attrs(str, attrs)
# first do matching ones
tags = MATCHING_WORD_PAIRS.keys.join("")
re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)"
# re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)"
1 while str.gsub!(Regexp.new(re)) {
attr = MATCHING_WORD_PAIRS[$2];
attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
$1 + NULL*$2.length + $3 + NULL*$2.length + $4
}
# then non-matching
unless WORD_PAIR_MAP.empty?
WORD_PAIR_MAP.each do |regexp, attr|
str.gsub!(regexp) {
attrs.set_attrs($`.length + $1.length, $2.length, attr)
NULL*$1.length + $2 + NULL*$3.length
}
end
end
end
def convert_html(str, attrs)
tags = HTML_TAGS.keys.join("|")
re = "<(#{tags})>(.*?)</\\1>"
1 while str.gsub!(Regexp.new(re, Regexp::IGNORECASE)) {
attr = HTML_TAGS[$1.downcase]
html_length = $1.length + 2
seq = NULL * html_length
attrs.set_attrs($`.length + html_length, $2.length, attr)
seq + $2 + seq + NULL
}
end
def convert_specials(str, attrs)
unless SPECIAL.empty?
SPECIAL.each do |regexp, attr|
str.scan(regexp) do
attrs.set_attrs($`.length, $&.length, attr | Attribute::SPECIAL)
end
end
end
end
# A \ in front of a character that would normally be
# processed turns off processing. We do this by turning
# \< into <#{PROTECT}
PROTECTABLE = [ "<" << "\\" ] #"
def mask_protected_sequences
protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])")
@str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}")
end
def unmask_protected_sequences
@str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
end
def initialize
add_word_pair("*", "*", :BOLD)
add_word_pair("_", "_", :EM)
add_word_pair("+", "+", :TT)
add_html("em", :EM)
add_html("i", :EM)
add_html("b", :BOLD)
add_html("tt", :TT)
add_html("code", :TT)
add_special(/<!--(.*?)-->/, :COMMENT)
end
def add_word_pair(start, stop, name)
raise "Word flags may not start '<'" if start[0] == ?<
bitmap = Attribute.bitmap_for(name)
if start == stop
MATCHING_WORD_PAIRS[start] = bitmap
else
pattern = Regexp.new("(" + Regexp.escape(start) + ")" +
# "([A-Za-z]+)" +
"(\\S+)" +
"(" + Regexp.escape(stop) +")")
WORD_PAIR_MAP[pattern] = bitmap
end
PROTECTABLE << start[0,1]
PROTECTABLE.uniq!
end
def add_html(tag, name)
HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name)
end
def add_special(pattern, name)
SPECIAL[pattern] = Attribute.bitmap_for(name)
end
def flow(str)
@str = str
puts("Before flow, str='#{@str.dump}'") if $DEBUG
mask_protected_sequences
@attrs = AttrSpan.new(@str.length)
puts("After protecting, str='#{@str.dump}'") if $DEBUG
convert_attrs(@str, @attrs)
convert_html(@str, @attrs)
convert_specials(str, @attrs)
unmask_protected_sequences
puts("After flow, str='#{@str.dump}'") if $DEBUG
return split_into_flow
end
def display_attributes
puts
puts @str.tr(NULL, "!")
bit = 1
16.times do |bno|
line = ""
@str.length.times do |i|
if (@attrs[i] & bit) == 0
line << " "
else
if bno.zero?
line << "S"
else
line << ("%d" % (bno+1))
end
end
end
puts(line) unless line =~ /^ *$/
bit <<= 1
end
end
def split_into_flow
display_attributes if $DEBUG
res = []
current_attr = 0
str = ""
str_len = @str.length
# skip leading invisible text
i = 0
i += 1 while i < str_len and @str[i] == "\0"
start_pos = i
# then scan the string, chunking it on attribute changes
while i < str_len
new_attr = @attrs[i]
if new_attr != current_attr
if i > start_pos
res << copy_string(start_pos, i)
start_pos = i
end
res << change_attribute(current_attr, new_attr)
current_attr = new_attr
if (current_attr & Attribute::SPECIAL) != 0
i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0
res << Special.new(current_attr, copy_string(start_pos, i))
start_pos = i
next
end
end
# move on, skipping any invisible characters
begin
i += 1
end while i < str_len and @str[i] == "\0"
end
# tidy up trailing text
if start_pos < str_len
res << copy_string(start_pos, str_len)
end
# and reset to all attributes off
res << change_attribute(current_attr, 0) if current_attr != 0
return res
end
end
end