2008-01-13 22:34:05 -05:00
|
|
|
require 'rdoc/markup'
|
|
|
|
|
|
|
|
class RDoc::Markup
|
2003-12-01 02:12:49 -05:00
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# We manage a set of attributes. Each attribute has a symbol name and a bit
|
2008-01-13 22:34:05 -05:00
|
|
|
# value.
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
class Attribute
|
|
|
|
SPECIAL = 1
|
|
|
|
|
|
|
|
@@name_to_bitmap = { :_SPECIAL_ => SPECIAL }
|
|
|
|
@@next_bitmap = 2
|
|
|
|
|
2008-02-09 22:59:08 -05:00
|
|
|
def self.bitmap_for(name)
|
2003-12-01 02:12:49 -05:00
|
|
|
bitmap = @@name_to_bitmap[name]
|
2008-02-09 22:59:08 -05:00
|
|
|
unless bitmap then
|
2003-12-01 02:12:49 -05:00
|
|
|
bitmap = @@next_bitmap
|
|
|
|
@@next_bitmap <<= 1
|
|
|
|
@@name_to_bitmap[name] = bitmap
|
|
|
|
end
|
|
|
|
bitmap
|
|
|
|
end
|
|
|
|
|
2008-02-09 22:59:08 -05:00
|
|
|
def self.as_string(bitmap)
|
2003-12-01 02:12:49 -05:00
|
|
|
return "none" if bitmap.zero?
|
|
|
|
res = []
|
|
|
|
@@name_to_bitmap.each do |name, bit|
|
|
|
|
res << name if (bitmap & bit) != 0
|
|
|
|
end
|
|
|
|
res.join(",")
|
|
|
|
end
|
|
|
|
|
2008-02-09 22:59:08 -05:00
|
|
|
def self.each_name_of(bitmap)
|
2003-12-01 02:12:49 -05:00
|
|
|
@@name_to_bitmap.each do |name, bit|
|
|
|
|
next if bit == SPECIAL
|
|
|
|
yield name.to_s if (bitmap & bit) != 0
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# An AttrChanger records a change in attributes. It contains a bitmap of the
|
2008-01-13 22:34:05 -05:00
|
|
|
# attributes to turn on, and a bitmap of those to turn off.
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
AttrChanger = Struct.new(:turn_on, :turn_off)
|
2008-01-12 22:35:34 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
class AttrChanger
|
|
|
|
def to_s
|
|
|
|
"Attr: +#{Attribute.as_string(@turn_on)}/-#{Attribute.as_string(@turn_on)}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
2008-01-13 22:34:05 -05:00
|
|
|
# An array of attributes which parallels the characters in a string.
|
2008-01-12 22:35:34 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
class AttrSpan
|
|
|
|
def initialize(length)
|
|
|
|
@attrs = Array.new(length, 0)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_attrs(start, length, bits)
|
|
|
|
for i in start ... (start+length)
|
|
|
|
@attrs[i] |= bits
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def [](n)
|
|
|
|
@attrs[n]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Hold details of a special sequence
|
|
|
|
|
|
|
|
class Special
|
|
|
|
attr_reader :type
|
|
|
|
attr_accessor :text
|
|
|
|
|
|
|
|
def initialize(type, text)
|
|
|
|
@type, @text = type, text
|
|
|
|
end
|
|
|
|
|
|
|
|
def ==(o)
|
|
|
|
self.text == o.text && self.type == o.type
|
|
|
|
end
|
|
|
|
|
2007-12-28 01:13:06 -05:00
|
|
|
def inspect
|
2008-01-13 22:34:05 -05:00
|
|
|
"#<RDoc::Markup::Special:0x%x @type=%p, name=%p @text=%p>" % [
|
|
|
|
object_id, @type, RDoc::Markup::Attribute.as_string(type), text.dump]
|
2003-12-01 02:12:49 -05:00
|
|
|
end
|
2008-02-09 22:59:08 -05:00
|
|
|
|
|
|
|
def to_s
|
|
|
|
"Special: type=#{type}, name=#{RDoc::Markup::Attribute.as_string type}, text=#{text.dump}"
|
|
|
|
end
|
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
end
|
2008-01-12 22:35:34 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
class AttributeManager
|
|
|
|
|
|
|
|
NULL = "\000".freeze
|
|
|
|
|
|
|
|
##
|
2008-01-12 22:35:34 -05:00
|
|
|
# We work by substituting non-printing characters in to the text. For now
|
|
|
|
# I'm assuming that I can substitute a character in the range 0..8 for a 7
|
|
|
|
# bit character without damaging the encoded string, but this might be
|
|
|
|
# optimistic
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
A_PROTECT = 004
|
|
|
|
PROTECT_ATTR = A_PROTECT.chr
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# This maps delimiters that occur around words (such as *bold* or +tt+)
|
|
|
|
# where the start and end delimiters and the same. This lets us optimize
|
|
|
|
# the regexp
|
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
MATCHING_WORD_PAIRS = {}
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# And this is used when the delimiters aren't the same. In this case the
|
|
|
|
# hash maps a pattern to the attribute character
|
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
WORD_PAIR_MAP = {}
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
2003-12-01 02:12:49 -05:00
|
|
|
# This maps HTML tags to the corresponding attribute char
|
2008-01-12 22:35:34 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
HTML_TAGS = {}
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# And this maps _special_ sequences to a name. A special sequence is
|
|
|
|
# something like a WikiWord
|
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
SPECIAL = {}
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# Return an attribute object with the given turn_on and turn_off bits set
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
def attribute(turn_on, turn_off)
|
|
|
|
AttrChanger.new(turn_on, turn_off)
|
|
|
|
end
|
|
|
|
|
|
|
|
def change_attribute(current, new)
|
|
|
|
diff = current ^ new
|
|
|
|
attribute(new & diff, current & diff)
|
|
|
|
end
|
|
|
|
|
|
|
|
def changed_attribute_by_name(current_set, new_set)
|
|
|
|
current = new = 0
|
|
|
|
current_set.each {|name| current |= Attribute.bitmap_for(name) }
|
|
|
|
new_set.each {|name| new |= Attribute.bitmap_for(name) }
|
|
|
|
change_attribute(current, new)
|
|
|
|
end
|
|
|
|
|
|
|
|
def copy_string(start_pos, end_pos)
|
|
|
|
res = @str[start_pos...end_pos]
|
|
|
|
res.gsub!(/\000/, '')
|
|
|
|
res
|
|
|
|
end
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# Map attributes like <b>text</b>to the sequence
|
|
|
|
# \001\002<char>\001\003<char>, where <char> is a per-attribute specific
|
|
|
|
# character
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
def convert_attrs(str, attrs)
|
|
|
|
# first do matching ones
|
|
|
|
tags = MATCHING_WORD_PAIRS.keys.join("")
|
2008-02-09 22:59:08 -05:00
|
|
|
|
2008-02-12 18:59:03 -05:00
|
|
|
re = /(^|\W)([#{tags}])([#\\]?[\w.\/]+?\S?)\2(\W|$)/
|
2008-02-09 22:59:08 -05:00
|
|
|
|
2008-02-12 18:59:03 -05:00
|
|
|
1 while str.gsub!(re) do
|
|
|
|
attr = MATCHING_WORD_PAIRS[$2]
|
2003-12-01 02:12:49 -05:00
|
|
|
attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
|
2008-02-12 18:59:03 -05:00
|
|
|
$1 + NULL * $2.length + $3 + NULL * $2.length + $4
|
|
|
|
end
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
# then non-matching
|
2008-02-12 18:59:03 -05:00
|
|
|
unless WORD_PAIR_MAP.empty? then
|
2003-12-01 02:12:49 -05:00
|
|
|
WORD_PAIR_MAP.each do |regexp, attr|
|
2008-02-12 18:59:03 -05:00
|
|
|
str.gsub!(regexp) {
|
2003-12-01 02:12:49 -05:00
|
|
|
attrs.set_attrs($`.length + $1.length, $2.length, attr)
|
2008-02-12 18:59:03 -05:00
|
|
|
NULL * $1.length + $2 + NULL * $3.length
|
2003-12-01 02:12:49 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def convert_html(str, attrs)
|
2008-02-09 22:59:08 -05:00
|
|
|
tags = HTML_TAGS.keys.join '|'
|
|
|
|
|
|
|
|
1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) {
|
2003-12-01 02:12:49 -05:00
|
|
|
attr = HTML_TAGS[$1.downcase]
|
|
|
|
html_length = $1.length + 2
|
|
|
|
seq = NULL * html_length
|
|
|
|
attrs.set_attrs($`.length + html_length, $2.length, attr)
|
|
|
|
seq + $2 + seq + NULL
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def convert_specials(str, attrs)
|
|
|
|
unless SPECIAL.empty?
|
|
|
|
SPECIAL.each do |regexp, attr|
|
|
|
|
str.scan(regexp) do
|
2006-06-09 21:20:24 -04:00
|
|
|
attrs.set_attrs($`.length, $&.length, attr | Attribute::SPECIAL)
|
2003-12-01 02:12:49 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2008-01-12 22:35:34 -05:00
|
|
|
##
|
|
|
|
# A \ in front of a character that would normally be processed turns off
|
|
|
|
# processing. We do this by turning \< into <#{PROTECT}
|
|
|
|
|
2008-02-12 18:59:03 -05:00
|
|
|
PROTECTABLE = %w[<\\]
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
def mask_protected_sequences
|
|
|
|
protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])")
|
|
|
|
@str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}")
|
|
|
|
end
|
|
|
|
|
|
|
|
def unmask_protected_sequences
|
2004-04-09 11:01:53 -04:00
|
|
|
@str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
|
2003-12-01 02:12:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
add_word_pair("*", "*", :BOLD)
|
|
|
|
add_word_pair("_", "_", :EM)
|
|
|
|
add_word_pair("+", "+", :TT)
|
2008-01-12 22:35:34 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
add_html("em", :EM)
|
|
|
|
add_html("i", :EM)
|
|
|
|
add_html("b", :BOLD)
|
2003-12-23 23:24:29 -05:00
|
|
|
add_html("tt", :TT)
|
|
|
|
add_html("code", :TT)
|
2006-06-09 21:20:24 -04:00
|
|
|
|
|
|
|
add_special(/<!--(.*?)-->/, :COMMENT)
|
2003-12-01 02:12:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def add_word_pair(start, stop, name)
|
|
|
|
raise "Word flags may not start '<'" if start[0] == ?<
|
|
|
|
bitmap = Attribute.bitmap_for(name)
|
|
|
|
if start == stop
|
|
|
|
MATCHING_WORD_PAIRS[start] = bitmap
|
|
|
|
else
|
|
|
|
pattern = Regexp.new("(" + Regexp.escape(start) + ")" +
|
|
|
|
# "([A-Za-z]+)" +
|
|
|
|
"(\\S+)" +
|
|
|
|
"(" + Regexp.escape(stop) +")")
|
|
|
|
WORD_PAIR_MAP[pattern] = bitmap
|
|
|
|
end
|
|
|
|
PROTECTABLE << start[0,1]
|
|
|
|
PROTECTABLE.uniq!
|
|
|
|
end
|
|
|
|
|
|
|
|
def add_html(tag, name)
|
|
|
|
HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name)
|
|
|
|
end
|
|
|
|
|
|
|
|
def add_special(pattern, name)
|
|
|
|
SPECIAL[pattern] = Attribute.bitmap_for(name)
|
|
|
|
end
|
|
|
|
|
|
|
|
def flow(str)
|
|
|
|
@str = str
|
|
|
|
|
2007-12-24 19:13:12 -05:00
|
|
|
puts("Before flow, str='#{@str.dump}'") if $DEBUG_RDOC
|
2003-12-01 02:12:49 -05:00
|
|
|
mask_protected_sequences
|
2008-01-12 22:35:34 -05:00
|
|
|
|
2004-04-09 11:01:53 -04:00
|
|
|
@attrs = AttrSpan.new(@str.length)
|
|
|
|
|
2007-12-24 19:13:12 -05:00
|
|
|
puts("After protecting, str='#{@str.dump}'") if $DEBUG_RDOC
|
2008-02-12 18:59:03 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
convert_attrs(@str, @attrs)
|
|
|
|
convert_html(@str, @attrs)
|
|
|
|
convert_specials(str, @attrs)
|
2008-02-12 18:59:03 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
unmask_protected_sequences
|
2008-02-12 18:59:03 -05:00
|
|
|
|
2007-12-24 19:13:12 -05:00
|
|
|
puts("After flow, str='#{@str.dump}'") if $DEBUG_RDOC
|
2008-02-12 18:59:03 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
return split_into_flow
|
|
|
|
end
|
|
|
|
|
|
|
|
def display_attributes
|
|
|
|
puts
|
|
|
|
puts @str.tr(NULL, "!")
|
|
|
|
bit = 1
|
|
|
|
16.times do |bno|
|
|
|
|
line = ""
|
|
|
|
@str.length.times do |i|
|
|
|
|
if (@attrs[i] & bit) == 0
|
|
|
|
line << " "
|
|
|
|
else
|
|
|
|
if bno.zero?
|
|
|
|
line << "S"
|
|
|
|
else
|
|
|
|
line << ("%d" % (bno+1))
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
puts(line) unless line =~ /^ *$/
|
|
|
|
bit <<= 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def split_into_flow
|
2007-12-24 19:13:12 -05:00
|
|
|
display_attributes if $DEBUG_RDOC
|
2003-12-01 02:12:49 -05:00
|
|
|
|
|
|
|
res = []
|
|
|
|
current_attr = 0
|
|
|
|
str = ""
|
|
|
|
|
|
|
|
str_len = @str.length
|
|
|
|
|
|
|
|
# skip leading invisible text
|
|
|
|
i = 0
|
2006-06-09 21:20:24 -04:00
|
|
|
i += 1 while i < str_len and @str[i] == "\0"
|
2003-12-01 02:12:49 -05:00
|
|
|
start_pos = i
|
|
|
|
|
|
|
|
# then scan the string, chunking it on attribute changes
|
|
|
|
while i < str_len
|
|
|
|
new_attr = @attrs[i]
|
|
|
|
if new_attr != current_attr
|
|
|
|
if i > start_pos
|
|
|
|
res << copy_string(start_pos, i)
|
|
|
|
start_pos = i
|
|
|
|
end
|
|
|
|
|
|
|
|
res << change_attribute(current_attr, new_attr)
|
|
|
|
current_attr = new_attr
|
|
|
|
|
|
|
|
if (current_attr & Attribute::SPECIAL) != 0
|
|
|
|
i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0
|
|
|
|
res << Special.new(current_attr, copy_string(start_pos, i))
|
|
|
|
start_pos = i
|
|
|
|
next
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# move on, skipping any invisible characters
|
|
|
|
begin
|
|
|
|
i += 1
|
2006-06-09 21:20:24 -04:00
|
|
|
end while i < str_len and @str[i] == "\0"
|
2003-12-01 02:12:49 -05:00
|
|
|
end
|
2008-01-12 22:35:34 -05:00
|
|
|
|
2003-12-01 02:12:49 -05:00
|
|
|
# tidy up trailing text
|
|
|
|
if start_pos < str_len
|
|
|
|
res << copy_string(start_pos, str_len)
|
|
|
|
end
|
|
|
|
|
|
|
|
# and reset to all attributes off
|
|
|
|
res << change_attribute(current_attr, 0) if current_attr != 0
|
|
|
|
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
2008-01-12 22:35:34 -05:00
|
|
|
|