require 'rdoc/markup' class RDoc::Markup ## # We manage a set of attributes. Each attribute has a symbol name and a bit # value. class Attribute SPECIAL = 1 @@name_to_bitmap = { :_SPECIAL_ => SPECIAL } @@next_bitmap = 2 def self.bitmap_for(name) bitmap = @@name_to_bitmap[name] unless bitmap then bitmap = @@next_bitmap @@next_bitmap <<= 1 @@name_to_bitmap[name] = bitmap end bitmap end def self.as_string(bitmap) return "none" if bitmap.zero? res = [] @@name_to_bitmap.each do |name, bit| res << name if (bitmap & bit) != 0 end res.join(",") end def self.each_name_of(bitmap) @@name_to_bitmap.each do |name, bit| next if bit == SPECIAL yield name.to_s if (bitmap & bit) != 0 end end end ## # An AttrChanger records a change in attributes. It contains a bitmap of the # attributes to turn on, and a bitmap of those to turn off. AttrChanger = Struct.new(:turn_on, :turn_off) class AttrChanger def to_s "Attr: +#{Attribute.as_string(@turn_on)}/-#{Attribute.as_string(@turn_on)}" end end ## # An array of attributes which parallels the characters in a string. class AttrSpan def initialize(length) @attrs = Array.new(length, 0) end def set_attrs(start, length, bits) for i in start ... (start+length) @attrs[i] |= bits end end def [](n) @attrs[n] end end ## # Hold details of a special sequence class Special attr_reader :type attr_accessor :text def initialize(type, text) @type, @text = type, text end def ==(o) self.text == o.text && self.type == o.type end def inspect "#" % [ object_id, @type, RDoc::Markup::Attribute.as_string(type), text.dump] end def to_s "Special: type=#{type}, name=#{RDoc::Markup::Attribute.as_string type}, text=#{text.dump}" end end class AttributeManager NULL = "\000".freeze ## # We work by substituting non-printing characters in to the text. For now # I'm assuming that I can substitute a character in the range 0..8 for a 7 # bit character without damaging the encoded string, but this might be # optimistic A_PROTECT = 004 PROTECT_ATTR = A_PROTECT.chr ## # This maps delimiters that occur around words (such as *bold* or +tt+) # where the start and end delimiters and the same. This lets us optimize # the regexp MATCHING_WORD_PAIRS = {} ## # And this is used when the delimiters aren't the same. In this case the # hash maps a pattern to the attribute character WORD_PAIR_MAP = {} ## # This maps HTML tags to the corresponding attribute char HTML_TAGS = {} ## # And this maps _special_ sequences to a name. A special sequence is # something like a WikiWord SPECIAL = {} ## # Return an attribute object with the given turn_on and turn_off bits set def attribute(turn_on, turn_off) AttrChanger.new(turn_on, turn_off) end def change_attribute(current, new) diff = current ^ new attribute(new & diff, current & diff) end def changed_attribute_by_name(current_set, new_set) current = new = 0 current_set.each {|name| current |= Attribute.bitmap_for(name) } new_set.each {|name| new |= Attribute.bitmap_for(name) } change_attribute(current, new) end def copy_string(start_pos, end_pos) res = @str[start_pos...end_pos] res.gsub!(/\000/, '') res end ## # Map attributes like textto the sequence # \001\002\001\003, where is a per-attribute specific # character def convert_attrs(str, attrs) # first do matching ones tags = MATCHING_WORD_PAIRS.keys.join("") re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)" # re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)" 1 while str.gsub!(Regexp.new(re)) { attr = MATCHING_WORD_PAIRS[$2]; attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr) $1 + NULL*$2.length + $3 + NULL*$2.length + $4 } # then non-matching unless WORD_PAIR_MAP.empty? WORD_PAIR_MAP.each do |regexp, attr| str.gsub!(regexp) { attrs.set_attrs($`.length + $1.length, $2.length, attr) NULL*$1.length + $2 + NULL*$3.length } end end end def convert_html(str, attrs) tags = HTML_TAGS.keys.join '|' 1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) { attr = HTML_TAGS[$1.downcase] html_length = $1.length + 2 seq = NULL * html_length attrs.set_attrs($`.length + html_length, $2.length, attr) seq + $2 + seq + NULL } end def convert_specials(str, attrs) unless SPECIAL.empty? SPECIAL.each do |regexp, attr| str.scan(regexp) do attrs.set_attrs($`.length, $&.length, attr | Attribute::SPECIAL) end end end end ## # A \ in front of a character that would normally be processed turns off # processing. We do this by turning \< into <#{PROTECT} PROTECTABLE = [ "<" << "\\" ] def mask_protected_sequences protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])") @str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}") end def unmask_protected_sequences @str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000") end def initialize add_word_pair("*", "*", :BOLD) add_word_pair("_", "_", :EM) add_word_pair("+", "+", :TT) add_html("em", :EM) add_html("i", :EM) add_html("b", :BOLD) add_html("tt", :TT) add_html("code", :TT) add_special(//, :COMMENT) end def add_word_pair(start, stop, name) raise "Word flags may not start '<'" if start[0] == ?< bitmap = Attribute.bitmap_for(name) if start == stop MATCHING_WORD_PAIRS[start] = bitmap else pattern = Regexp.new("(" + Regexp.escape(start) + ")" + # "([A-Za-z]+)" + "(\\S+)" + "(" + Regexp.escape(stop) +")") WORD_PAIR_MAP[pattern] = bitmap end PROTECTABLE << start[0,1] PROTECTABLE.uniq! end def add_html(tag, name) HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name) end def add_special(pattern, name) SPECIAL[pattern] = Attribute.bitmap_for(name) end def flow(str) @str = str puts("Before flow, str='#{@str.dump}'") if $DEBUG_RDOC mask_protected_sequences @attrs = AttrSpan.new(@str.length) puts("After protecting, str='#{@str.dump}'") if $DEBUG_RDOC convert_attrs(@str, @attrs) convert_html(@str, @attrs) convert_specials(str, @attrs) unmask_protected_sequences puts("After flow, str='#{@str.dump}'") if $DEBUG_RDOC return split_into_flow end def display_attributes puts puts @str.tr(NULL, "!") bit = 1 16.times do |bno| line = "" @str.length.times do |i| if (@attrs[i] & bit) == 0 line << " " else if bno.zero? line << "S" else line << ("%d" % (bno+1)) end end end puts(line) unless line =~ /^ *$/ bit <<= 1 end end def split_into_flow display_attributes if $DEBUG_RDOC res = [] current_attr = 0 str = "" str_len = @str.length # skip leading invisible text i = 0 i += 1 while i < str_len and @str[i] == "\0" start_pos = i # then scan the string, chunking it on attribute changes while i < str_len new_attr = @attrs[i] if new_attr != current_attr if i > start_pos res << copy_string(start_pos, i) start_pos = i end res << change_attribute(current_attr, new_attr) current_attr = new_attr if (current_attr & Attribute::SPECIAL) != 0 i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0 res << Special.new(current_attr, copy_string(start_pos, i)) start_pos = i next end end # move on, skipping any invisible characters begin i += 1 end while i < str_len and @str[i] == "\0" end # tidy up trailing text if start_pos < str_len res << copy_string(start_pos, str_len) end # and reset to all attributes off res << change_attribute(current_attr, 0) if current_attr != 0 return res end end end