Cross-ported the REXML changes from HEAD to the 1.8 branch.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@8486 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2005-05-19 03:51:53 +00:00 · 2005-05-19 03:51:53 +00:00 · ed512acb2f
commit ed512acb2f
parent d4d497dd86
23 changed files with 1332 additions and 1036 deletions
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@ -5,180 +5,182 @@ require 'rexml/doctype'
 require 'rexml/parseexception'

 module REXML
-	# Represents text nodes in an XML document
-	class Text < Child
-		include Comparable
-		# The order in which the substitutions occur
-		SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
-		SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
-		# Characters which are substituted in written strings
-		SLAICEPS = [ '<', '>', '"', "'", '&' ]
-		SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
+  # Represents text nodes in an XML document
+  class Text < Child
+    include Comparable
+    # The order in which the substitutions occur
+    SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
+    SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
+    # Characters which are substituted in written strings
+    SLAICEPS = [ '<', '>', '"', "'", '&' ]
+    SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]

-		# If +raw+ is true, then REXML leaves the value alone
-		attr_accessor :raw
+    # If +raw+ is true, then REXML leaves the value alone
+    attr_accessor :raw

-		ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
-		NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ 
+    ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
+    NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ 

-		# Constructor
-		# +arg+ if a String, the content is set to the String.  If a Text,
-		# the object is shallowly cloned.  
-		#
-		# +respect_whitespace+ (boolean, false) if true, whitespace is
-		# respected
-		#
-		# +parent+ (nil) if this is a Parent object, the parent
-		# will be set to this.  
-		#
-		# +raw+ (nil) This argument can be given three values.
-		# If true, then the value of used to construct this object is expected to 
-		# contain no unescaped XML markup, and REXML will not change the text. If 
-		# this value is false, the string may contain any characters, and REXML will
-		# escape any and all defined entities whose values are contained in the
-		# text.  If this value is nil (the default), then the raw value of the 
-		# parent will be used as the raw value for this node.  If there is no raw
-		# value for the parent, and no value is supplied, the default is false.
-		#   Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
-		#   Text.new( "<&", false, nil, true )  #-> IllegalArgumentException
-		#   Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
-		#   # Assume that the entity "s" is defined to be "sean"
-		#   # and that the entity    "r" is defined to be "russell"
-		#   Text.new( "sean russell" )          #-> "&s; &r;"
-		#   Text.new( "sean russell", false, nil, true ) #-> "sean russell"
-		#
-		# +entity_filter+ (nil) This can be an array of entities to match in the
-		# supplied text.  This argument is only useful if +raw+ is set to false.
-		#   Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
-		#   Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
-		# In the last example, the +entity_filter+ argument is ignored.
-		#
-		# +pattern+ INTERNAL USE ONLY
-		def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 
-			entity_filter=nil, illegal=ILLEGAL )
+    # Constructor
+    # +arg+ if a String, the content is set to the String.  If a Text,
+    # the object is shallowly cloned.  
+    #
+    # +respect_whitespace+ (boolean, false) if true, whitespace is
+    # respected
+    #
+    # +parent+ (nil) if this is a Parent object, the parent
+    # will be set to this.  
+    #
+    # +raw+ (nil) This argument can be given three values.
+    # If true, then the value of used to construct this object is expected to 
+    # contain no unescaped XML markup, and REXML will not change the text. If 
+    # this value is false, the string may contain any characters, and REXML will
+    # escape any and all defined entities whose values are contained in the
+    # text.  If this value is nil (the default), then the raw value of the 
+    # parent will be used as the raw value for this node.  If there is no raw
+    # value for the parent, and no value is supplied, the default is false.
+    #   Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
+    #   Text.new( "<&", false, nil, true )  #-> IllegalArgumentException
+    #   Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
+    #   # Assume that the entity "s" is defined to be "sean"
+    #   # and that the entity    "r" is defined to be "russell"
+    #   Text.new( "sean russell" )          #-> "&s; &r;"
+    #   Text.new( "sean russell", false, nil, true ) #-> "sean russell"
+    #
+    # +entity_filter+ (nil) This can be an array of entities to match in the
+    # supplied text.  This argument is only useful if +raw+ is set to false.
+    #   Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
+    #   Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
+    # In the last example, the +entity_filter+ argument is ignored.
+    #
+    # +pattern+ INTERNAL USE ONLY
+    def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 
+      entity_filter=nil, illegal=ILLEGAL )

-			@raw = false
+      @raw = false

-			if parent
-				super( parent )
-				@raw = parent.raw 
-			else
-				@parent = nil
-			end
+      if parent
+        super( parent )
+        @raw = parent.raw 
+      else
+        @parent = nil
+      end

-			@raw = raw unless raw.nil?
-			@entity_filter = entity_filter
-			@normalized = @unnormalized = nil
+      @raw = raw unless raw.nil?
+      @entity_filter = entity_filter
+      @normalized = @unnormalized = nil

-			if arg.kind_of? String
-				@string = arg.clone
-				@string.squeeze!(" \n\t") unless respect_whitespace
-			elsif arg.kind_of? Text
-				@string = arg.to_s
-				@raw = arg.raw
-			elsif
-				raise Exception.new( "Illegal argument of type #{arg.type} for Text constructor (#{arg})" )
-			end
+      if arg.kind_of? String
+        @string = arg.clone
+        @string.squeeze!(" \n\t") unless respect_whitespace
+      elsif arg.kind_of? Text
+        @string = arg.to_s
+        @raw = arg.raw
+      elsif
+        raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
+      end

-			@string.gsub!( /\r\n?/, "\n" )
+      @string.gsub!( /\r\n?/, "\n" )

-			# check for illegal characters
-			if @raw
-				if @string =~ illegal
-					raise Exception.new(
-						"Illegal character '#{$1}' in raw string \"#{@string}\""
-					)
-				end
-			end
-		end
+      # check for illegal characters
+      if @raw
+        if @string =~ illegal
+          raise "Illegal character '#{$1}' in raw string \"#{@string}\""
+        end
+      end
+    end

-		def node_type
-			:text
-		end
+    def node_type
+      :text
+    end

-		def empty?
-			@string.size==0
-		end
+    def empty?
+      @string.size==0
+    end


-		def clone
-			return Text.new(self)
-		end
+    def clone
+      return Text.new(self)
+    end


-		# Appends text to this text node.  The text is appended in the +raw+ mode
-		# of this text node.
-		def <<( to_append )
-			@string << to_append.gsub( /\r\n?/, "\n" )
-		end
+    # Appends text to this text node.  The text is appended in the +raw+ mode
+    # of this text node.
+    def <<( to_append )
+      @string << to_append.gsub( /\r\n?/, "\n" )
+    end


-		# +other+ a String or a Text
-		# +returns+ the result of (to_s <=> arg.to_s)
-		def <=>( other )
-			to_s() <=> other.to_s
-		end
+    # +other+ a String or a Text
+    # +returns+ the result of (to_s <=> arg.to_s)
+    def <=>( other )
+      to_s() <=> other.to_s
+    end

-		REFERENCE = /#{Entity::REFERENCE}/
-		# Returns the string value of this text node.  This string is always
-		# escaped, meaning that it is a valid XML text node string, and all
-		# entities that can be escaped, have been inserted.  This method respects
-		# the entity filter set in the constructor.
-		#   
-		#   # Assume that the entity "s" is defined to be "sean", and that the 
-		#   # entity "r" is defined to be "russell"
-		#   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
-		#   t.to_s   #-> "&lt; &amp; &s; russell"
-		#   t = Text.new( "< & &s; russell", false, nil, false ) 
-		#   t.to_s   #-> "&lt; &amp; &s; russell"
-		#   u = Text.new( "sean russell", false, nil, true )
-		#   u.to_s   #-> "sean russell"
-		def to_s
-			return @string if @raw
-			return @normalized if @normalized
+    REFERENCE = /#{Entity::REFERENCE}/
+    # Returns the string value of this text node.  This string is always
+    # escaped, meaning that it is a valid XML text node string, and all
+    # entities that can be escaped, have been inserted.  This method respects
+    # the entity filter set in the constructor.
+    #   
+    #   # Assume that the entity "s" is defined to be "sean", and that the 
+    #   # entity "r" is defined to be "russell"
+    #   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
+    #   t.to_s   #-> "&lt; &amp; &s; russell"
+    #   t = Text.new( "< & &s; russell", false, nil, false ) 
+    #   t.to_s   #-> "&lt; &amp; &s; russell"
+    #   u = Text.new( "sean russell", false, nil, true )
+    #   u.to_s   #-> "sean russell"
+    def to_s
+      return @string if @raw
+      return @normalized if @normalized

-			doctype = nil
-			if @parent
-				doc = @parent.document
-				doctype = doc.doctype if doc
-			end
+      doctype = nil
+      if @parent
+        doc = @parent.document
+        doctype = doc.doctype if doc
+      end

-			@normalized = Text::normalize( @string, doctype, @entity_filter )
-		end
+      @normalized = Text::normalize( @string, doctype, @entity_filter )
+    end

-		# Returns the string value of this text.  This is the text without
-		# entities, as it might be used programmatically, or printed to the
-		# console.  This ignores the 'raw' attribute setting, and any
-		# entity_filter.
-		#
-		#   # Assume that the entity "s" is defined to be "sean", and that the 
-		#   # entity "r" is defined to be "russell"
-		#   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
-		#   t.string   #-> "< & sean russell"
-		#   t = Text.new( "< & &s; russell", false, nil, false )
-		#   t.string   #-> "< & sean russell"
-		#   u = Text.new( "sean russell", false, nil, true )
-		#   u.string   #-> "sean russell"
-		def value
-			@unnormalized if @unnormalized
-			doctype = nil
-			if @parent
-				doc = @parent.document
-				doctype = doc.doctype if doc
-			end
-			@unnormalized = Text::unnormalize( @string, doctype )
-		end
- 		
- 		def wrap(string, width, addnewline=false)
- 			# Recursivly wrap string at width.
- 			return string if string.length <= width
- 			place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
- 			if addnewline then
- 				return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
- 			else
- 				return string[0,place] + "\n" + wrap(string[place+1..-1], width)
- 			end
- 		end
+    def inspect
+      @string.inspect
+    end
+
+    # Returns the string value of this text.  This is the text without
+    # entities, as it might be used programmatically, or printed to the
+    # console.  This ignores the 'raw' attribute setting, and any
+    # entity_filter.
+    #
+    #   # Assume that the entity "s" is defined to be "sean", and that the 
+    #   # entity "r" is defined to be "russell"
+    #   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
+    #   t.string   #-> "< & sean russell"
+    #   t = Text.new( "< & &s; russell", false, nil, false )
+    #   t.string   #-> "< & sean russell"
+    #   u = Text.new( "sean russell", false, nil, true )
+    #   u.string   #-> "sean russell"
+    def value
+      @unnormalized if @unnormalized
+      doctype = nil
+      if @parent
+        doc = @parent.document
+        doctype = doc.doctype if doc
+      end
+      @unnormalized = Text::unnormalize( @string, doctype )
+    end
+     
+     def wrap(string, width, addnewline=false)
+       # Recursivly wrap string at width.
+       return string if string.length <= width
+       place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
+       if addnewline then
+         return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
+       else
+         return string[0,place] + "\n" + wrap(string[place+1..-1], width)
+       end
+     end

    # Sets the contents of this text node.  This expects the text to be 
    # unnormalized.  It returns self.
@ -188,26 +190,26 @@ module REXML
    #   e[0].value = "bar"    # <a>bar</a>
    #   e[0].value = "<a>"    # <a>&lt;a&gt;</a>
    def value=( val )
-			@string = val.gsub( /\r\n?/, "\n" )
+      @string = val.gsub( /\r\n?/, "\n" )
      @unnormalized = nil
      @normalized = nil
      @raw = false
    end
 
- 		def indent_text(string, level=1, style="\t", indentfirstline=true)
+     def indent_text(string, level=1, style="\t", indentfirstline=true)
      return string if level < 0
- 			new_string = ''
- 			string.each { |line|
- 				indent_string = style * level
- 				new_line = (indent_string + line).sub(/[\s]+$/,'')
- 				new_string << new_line
- 			}
- 			new_string.strip! unless indentfirstline
- 			return new_string
- 		end
+       new_string = ''
+       string.each { |line|
+         indent_string = style * level
+         new_line = (indent_string + line).sub(/[\s]+$/,'')
+         new_string << new_line
+       }
+       new_string.strip! unless indentfirstline
+       return new_string
+     end
 
-		def write( writer, indent=-1, transitive=false, ie_hack=false ) 
-			s = to_s()
+    def write( writer, indent=-1, transitive=false, ie_hack=false ) 
+      s = to_s()
      if not (@parent and @parent.whitespace) then
        s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
        if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
@ -216,7 +218,7 @@ module REXML
        s.squeeze!(" \n\t") if @parent and !@parent.whitespace
      end
      writer << s
-		end
+    end

    # FIXME
    # This probably won't work properly
@ -226,111 +228,111 @@ module REXML
      return path
    end

-		# Writes out text, substituting special characters beforehand.
-		# +out+ A String, IO, or any other object supporting <<( String )
-		# +input+ the text to substitute and the write out
-		#
-		#   z=utf8.unpack("U*")
-		#   ascOut=""
-		#   z.each{|r|
-		#     if r <  0x100
-		#       ascOut.concat(r.chr)
-		#     else
-		#       ascOut.concat(sprintf("&#x%x;", r))
-		#     end
-		#   }
-		#   puts ascOut
-		def write_with_substitution out, input
-			copy = input.clone
-			# Doing it like this rather than in a loop improves the speed
-			copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
-			copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
-			copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
-			copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
-			copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
-			copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
-			out << copy
-		end
+    # Writes out text, substituting special characters beforehand.
+    # +out+ A String, IO, or any other object supporting <<( String )
+    # +input+ the text to substitute and the write out
+    #
+    #   z=utf8.unpack("U*")
+    #   ascOut=""
+    #   z.each{|r|
+    #     if r <  0x100
+    #       ascOut.concat(r.chr)
+    #     else
+    #       ascOut.concat(sprintf("&#x%x;", r))
+    #     end
+    #   }
+    #   puts ascOut
+    def write_with_substitution out, input
+      copy = input.clone
+      # Doing it like this rather than in a loop improves the speed
+      copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
+      copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
+      copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
+      copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
+      copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
+      copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
+      out << copy
+    end

-		# Reads text, substituting entities
-		def Text::read_with_substitution( input, illegal=nil )
-			copy = input.clone
+    # Reads text, substituting entities
+    def Text::read_with_substitution( input, illegal=nil )
+      copy = input.clone

-			if copy =~ illegal
-				raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
-			end if illegal
-			
-			copy.gsub!( /\r\n?/, "\n" )
-			if copy.include? ?&
-				copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
-				copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
-				copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
-				copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
-				copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
-				copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
-					m=$1
-					#m='0' if m==''
-					m = "0#{m}" if m[0] == ?x
-					[Integer(m)].pack('U*')
-				}
-			end
-			copy
-		end
+      if copy =~ illegal
+        raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
+      end if illegal
+      
+      copy.gsub!( /\r\n?/, "\n" )
+      if copy.include? ?&
+        copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
+        copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
+        copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
+        copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
+        copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
+        copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
+          m=$1
+          #m='0' if m==''
+          m = "0#{m}" if m[0] == ?x
+          [Integer(m)].pack('U*')
+        }
+      end
+      copy
+    end

-		EREFERENCE = /&(?!#{Entity::NAME};)/
-		# Escapes all possible entities
-		def Text::normalize( input, doctype=nil, entity_filter=nil )
-			copy = input.clone
-			# Doing it like this rather than in a loop improves the speed
-			if doctype
-				copy = copy.gsub( EREFERENCE, '&amp;' )
-				doctype.entities.each_value do |entity|
-					copy = copy.gsub( entity.value, 
-						"&#{entity.name};" ) if entity.value and 
-							not( entity_filter and entity_filter.include?(entity) )
-				end
-			else
-				copy = copy.gsub( EREFERENCE, '&amp;' )
-				DocType::DEFAULT_ENTITIES.each_value do |entity|
-					copy = copy.gsub(entity.value, "&#{entity.name};" )
-				end
-			end
-			copy
-		end
+    EREFERENCE = /&(?!#{Entity::NAME};)/
+    # Escapes all possible entities
+    def Text::normalize( input, doctype=nil, entity_filter=nil )
+      copy = input.clone
+      # Doing it like this rather than in a loop improves the speed
+      if doctype
+        copy = copy.gsub( EREFERENCE, '&amp;' )
+        doctype.entities.each_value do |entity|
+          copy = copy.gsub( entity.value, 
+            "&#{entity.name};" ) if entity.value and 
+              not( entity_filter and entity_filter.include?(entity) )
+        end
+      else
+        copy = copy.gsub( EREFERENCE, '&amp;' )
+        DocType::DEFAULT_ENTITIES.each_value do |entity|
+          copy = copy.gsub(entity.value, "&#{entity.name};" )
+        end
+      end
+      copy
+    end

-		# Unescapes all possible entities
-		def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
-			rv = string.clone
-			rv.gsub!( /\r\n?/, "\n" )
-			matches = rv.scan( REFERENCE )
-			return rv if matches.size == 0
-			rv.gsub!( NUMERICENTITY ) {|m|
-				m=$1
-				m = "0#{m}" if m[0] == ?x
-				[Integer(m)].pack('U*')
-			}
-			matches.collect!{|x|x[0]}.compact!
-			if matches.size > 0
-				if doctype
-					matches.each do |entity_reference|
-						unless filter and filter.include?(entity_reference)
-							entity_value = doctype.entity( entity_reference )
-							re = /&#{entity_reference};/
-							rv.gsub!( re, entity_value ) if entity_value
-						end
-					end
-				else
-					matches.each do |entity_reference|
-						unless filter and filter.include?(entity_reference)
-							entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
-							re = /&#{entity_reference};/
-							rv.gsub!( re, entity_value.value ) if entity_value
-						end
-					end
-				end
-				rv.gsub!( /&amp;/, '&' )
-			end
-			rv
-		end
-	end
+    # Unescapes all possible entities
+    def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
+      rv = string.clone
+      rv.gsub!( /\r\n?/, "\n" )
+      matches = rv.scan( REFERENCE )
+      return rv if matches.size == 0
+      rv.gsub!( NUMERICENTITY ) {|m|
+        m=$1
+        m = "0#{m}" if m[0] == ?x
+        [Integer(m)].pack('U*')
+      }
+      matches.collect!{|x|x[0]}.compact!
+      if matches.size > 0
+        if doctype
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              entity_value = doctype.entity( entity_reference )
+              re = /&#{entity_reference};/
+              rv.gsub!( re, entity_value ) if entity_value
+            end
+          end
+        else
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
+              re = /&#{entity_reference};/
+              rv.gsub!( re, entity_value.value ) if entity_value
+            end
+          end
+        end
+        rv.gsub!( /&amp;/, '&' )
+      end
+      rv
+    end
+  end
 end