ruby--ruby/lib/rexml/text.rb

require 'rexml/entity'

module REXML
	# Represents text nodes in an XML document
	class Text < Child
		include Comparable
		# The order in which the substitutions occur
		SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
		SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
		# Characters which are substituted in written strings
		SLAICEPS = [ '<', '>', '"', "'", '&' ]
		SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]

		# If +raw+ is true, then REXML leaves the value alone
		attr_accessor :raw

		ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
		NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ 

		# Constructor
		# +arg+ if a String, the content is set to the String.  If a Text,
		# the object is shallowly cloned.  
		#
		# +respect_whitespace+ (boolean, false) if true, whitespace is
		# respected
		#
		# +parent+ (nil) if this is a Parent object, the parent
		# will be set to this.  
		#
		# +raw+ (nil) This argument can be given three values.
		# If true, then the value of used to construct this object is expected to 
		# contain no unescaped XML markup, and REXML will not change the text. If 
		# this value is false, the string may contain any characters, and REXML will
		# escape any and all defined entities whose values are contained in the
		# text.  If this value is nil (the default), then the raw value of the 
		# parent will be used as the raw value for this node.  If there is no raw
		# value for the parent, and no value is supplied, the default is false.
		#   Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
		#   Text.new( "<&", false, nil, true )  #-> IllegalArgumentException
		#   Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
		#   # Assume that the entity "s" is defined to be "sean"
		#   # and that the entity    "r" is defined to be "russell"
		#   Text.new( "sean russell" )          #-> "&s; &r;"
		#   Text.new( "sean russell", false, nil, true ) #-> "sean russell"
		#
		# +entity_filter+ (nil) This can be an array of entities to match in the
		# supplied text.  This argument is only useful if +raw+ is set to false.
		#   Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
		#   Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
		# In the last example, the +entity_filter+ argument is ignored.
		#
		# +pattern+ INTERNAL USE ONLY
		def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 
			entity_filter=nil, illegal=ILLEGAL )

			@raw = false

			if parent
				super( parent )
				@raw = parent.raw 
			else
				@parent = nil
			end

			@raw = raw unless raw.nil?
			@entity_filter = entity_filter
			@normalized = @unnormalized = nil

			if arg.kind_of? String
				@string = arg.clone
				@string.squeeze!(" \n\t") unless respect_whitespace
			elsif arg.kind_of? Text
				@string = arg.to_s
				@raw = arg.raw
			elsif
				raise Exception.new( "Illegal argument of type #{arg.type} for Text constructor (#{arg})" )
			end

			@string.gsub!( /\r\n?/, "\n" )

			# check for illegal characters
			if @raw
				if @string =~ illegal
					raise Exception.new(
						"Illegal character '#{$1}' in raw string \"#{@string}\""
					)
				end
			end
		end

		def node_type
			:text
		end

		def empty?
			@string.size==0
		end


		def clone
			return Text.new(self)
		end


		# Appends text to this text node.  The text is appended in the +raw+ mode
		# of this text node.
		def <<( to_append )
			@string << to_append.gsub( /\r\n?/, "\n" )
		end


		# +other+ a String or a Text
		# +returns+ the result of (to_s <=> arg.to_s)
		def <=>( other )
			to_s() <=> other.to_s
		end

		REFERENCE = /#{Entity::REFERENCE}/
		# Returns the string value of this text node.  This string is always
		# escaped, meaning that it is a valid XML text node string, and all
		# entities that can be escaped, have been inserted.  This method respects
		# the entity filter set in the constructor.
		#   
		#   # Assume that the entity "s" is defined to be "sean", and that the 
		#   # entity "r" is defined to be "russell"
		#   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
		#   t.to_s   #-> "&lt; &amp; &s; russell"
		#   t = Text.new( "< & &s; russell", false, nil, false ) 
		#   t.to_s   #-> "&lt; &amp; &s; russell"
		#   u = Text.new( "sean russell", false, nil, true )
		#   u.to_s   #-> "sean russell"
		def to_s
			return @string if @raw
			return @normalized if @normalized

			doctype = nil
			if @parent
				doc = @parent.document
				doctype = doc.doctype if doc
			end

			@normalized = Text::normalize( @string, doctype, @entity_filter )
		end

		# Returns the string value of this text.  This is the text without
		# entities, as it might be used programmatically, or printed to the
		# console.  This ignores the 'raw' attribute setting, and any
		# entity_filter.
		#
		#   # Assume that the entity "s" is defined to be "sean", and that the 
		#   # entity "r" is defined to be "russell"
		#   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
		#   t.string   #-> "< & sean russell"
		#   t = Text.new( "< & &s; russell", false, nil, false )
		#   t.string   #-> "< & sean russell"
		#   u = Text.new( "sean russell", false, nil, true )
		#   u.string   #-> "sean russell"
		def value
			@unnormalized if @unnormalized
			doctype = nil
			if @parent
				doc = @parent.document
				doctype = doc.doctype if doc
			end
			@unnormalized = Text::unnormalize( @string, doctype )
		end
 		
 		def wrap(string, width, addnewline=false)
 			# Recursivly wrap string at width.
 			return string if string.length <= width
 			place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
 			if addnewline then
 				return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
 			else
 				return string[0,place] + "\n" + wrap(string[place+1..-1], width)
 			end
 		end
 
 		def indent(string, level=1, style="\t", indentfirstline=true)
      return string if level < 0
 			new_string = ''
 			string.each { |line|
 				indent_string = style * level
 				new_line = (indent_string + line).sub(/[\s]+$/,'')
 				new_string << new_line
 			}
 			new_string.strip! unless indentfirstline
 			return new_string
 		end
 
		def write( writer, indent=-1, transitive=false, ie_hack=false ) 
			s = to_s()
 			if not (@parent and @parent.whitespace) then
 				s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
 				if @parent and not @parent.context[:indentstyle].nil? then
 					indentstyle = @parent.context[:indentstyle]
 				else
 					indentstyle = '  '
 				end
 				if s.count("\n") > 0 and indent > 0 then
 					s = indent(s, indent, indentstyle, false)
 				end
 			end
      writer << s
		end

		# Writes out text, substituting special characters beforehand.
		# +out+ A String, IO, or any other object supporting <<( String )
		# +input+ the text to substitute and the write out
		#
		#   z=utf8.unpack("U*")
		#   ascOut=""
		#   z.each{|r|
		#     if r <  0x100
		#       ascOut.concat(r.chr)
		#     else
		#       ascOut.concat(sprintf("&#x%x;", r))
		#     end
		#   }
		#   puts ascOut
		def write_with_substitution out, input
			copy = input.clone
			# Doing it like this rather than in a loop improves the speed
			copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
			copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
			copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
			copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
			copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
			copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
			out << copy
		end

		# Reads text, substituting entities
		def Text::read_with_substitution( input, illegal=nil )
			copy = input.clone

			if copy =~ illegal
				raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
			end if illegal
			
			copy.gsub!( /\r\n?/, "\n" )
			if copy.include? ?&
				copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
				copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
				copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
				copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
				copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
				copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
					m=$1
					#m='0' if m==''
					m = "0#{m}" if m[0] == ?x
					[Integer(m)].pack('U*')
				}
			end
			copy
		end

		EREFERENCE = /&(?!#{Entity::NAME};)/
		# Escapes all possible entities
		def Text::normalize( input, doctype=nil, entity_filter=nil )
			copy = input.clone
			# Doing it like this rather than in a loop improves the speed
			if doctype
				copy.gsub!( EREFERENCE, '&amp;' )
				doctype.entities.each_value do |entity|
					copy.gsub!( entity.value, 
						"&#{entity.name};" ) if entity.value and 
							not( entity_filter and entity_filter.include?(entity) )
				end
			else
				copy.gsub!( EREFERENCE, '&amp;' )
				DocType::DEFAULT_ENTITIES.each_value do |entity|
					copy.gsub!(entity.value, "&#{entity.name};" )
				end
			end
			copy
		end

		# Unescapes all possible entities
		def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
			rv = string.clone
			rv.gsub!( /\r\n?/, "\n" )
			matches = rv.scan( REFERENCE )
			return rv if matches.size == 0
			rv.gsub!( NUMERICENTITY ) {|m|
				m=$1
				m = "0#{m}" if m[0] == ?x
				[Integer(m)].pack('U*')
			}
			matches.collect!{|x|x[0]}.compact!
			if matches.size > 0
				if doctype
					matches.each do |entity_reference|
						unless filter and filter.include?(entity_reference)
							entity_value = doctype.entity( entity_reference )
							re = /&#{entity_reference};/
							rv.gsub!( re, entity_value ) if entity_value
						end
					end
				else
					matches.each do |entity_reference|
						unless filter and filter.include?(entity_reference)
							entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
							re = /&#{entity_reference};/
							rv.gsub!( re, entity_value.value ) if entity_value
						end
					end
				end
				rv.gsub!( /&amp;/, '&' )
			end
			rv
		end
	end
end
Initial revision git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2003-06-09 21:31:01 -04:00			`require 'rexml/entity'`

			`module REXML`
			`# Represents text nodes in an XML document`
			`class Text < Child`
			`include Comparable`
			`# The order in which the substitutions occur`
			`SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]`
			`SUBSTITUTES = ['&', '<', '>', '"', ''', ' ']`
			`# Characters which are substituted in written strings`
			`SLAICEPS = [ '<', '>', '"', "'", '&' ]`
			`SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ]`

			`# If +raw+ is true, then REXML leaves the value alone`
			`attr_accessor :raw`

			`ILLEGAL = /(<\|&(?!(#{Entity::NAME})\|(#0*((?:\d+)\|(?:x[a-fA-F0-9]+)));))/um`
			`NUMERICENTITY = /&#0*((?:\d+)\|(?:x[a-fA-F0-9]+));/`

			`# Constructor`
			`# +arg+ if a String, the content is set to the String. If a Text,`
			`# the object is shallowly cloned.`
			`#`
			`# +respect_whitespace+ (boolean, false) if true, whitespace is`
			`# respected`
			`#`
			`# +parent+ (nil) if this is a Parent object, the parent`
			`# will be set to this.`
			`#`
			`# +raw+ (nil) This argument can be given three values.`
			`# If true, then the value of used to construct this object is expected to`
			`# contain no unescaped XML markup, and REXML will not change the text. If`
			`# this value is false, the string may contain any characters, and REXML will`
			`# escape any and all defined entities whose values are contained in the`
			`# text. If this value is nil (the default), then the raw value of the`
			`# parent will be used as the raw value for this node. If there is no raw`
			`# value for the parent, and no value is supplied, the default is false.`
			`# Text.new( "<&", false, nil, false ) #-> "<&"`
			`# Text.new( "<&", false, nil, true ) #-> IllegalArgumentException`
			`# Text.new( "<&", false, nil, true ) #-> "<&"`
			`# # Assume that the entity "s" is defined to be "sean"`
			`# # and that the entity "r" is defined to be "russell"`
			`# Text.new( "sean russell" ) #-> "&s; &r;"`
			`# Text.new( "sean russell", false, nil, true ) #-> "sean russell"`
			`#`
			`# +entity_filter+ (nil) This can be an array of entities to match in the`
			`# supplied text. This argument is only useful if +raw+ is set to false.`
			`# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"`
			`# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"`
			`# In the last example, the +entity_filter+ argument is ignored.`
			`#`
			`# +pattern+ INTERNAL USE ONLY`
			`def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,`
			`entity_filter=nil, illegal=ILLEGAL )`

			`@raw = false`

			`if parent`
			`super( parent )`
			`@raw = parent.raw`
			`else`
			`@parent = nil`
			`end`

			`@raw = raw unless raw.nil?`
			`@entity_filter = entity_filter`
			`@normalized = @unnormalized = nil`

			`if arg.kind_of? String`
			`@string = arg.clone`
			`@string.squeeze!(" \n\t") unless respect_whitespace`
			`elsif arg.kind_of? Text`
			`@string = arg.to_s`
			`@raw = arg.raw`
			`elsif`
			`raise Exception.new( "Illegal argument of type #{arg.type} for Text constructor (#{arg})" )`
			`end`

			`@string.gsub!( /\r\n?/, "\n" )`

			`# check for illegal characters`
			`if @raw`
			`if @string =~ illegal`
			`raise Exception.new(`
			`"Illegal character '#{$1}' in raw string \"#{@string}\""`
			`)`
			`end`
			`end`
			`end`

			`def node_type`
			`:text`
			`end`

			`def empty?`
			`@string.size==0`
			`end`


			`def clone`
			`return Text.new(self)`
			`end`


			`# Appends text to this text node. The text is appended in the +raw+ mode`
			`# of this text node.`
			`def <<( to_append )`
			`@string << to_append.gsub( /\r\n?/, "\n" )`
			`end`


			`# +other+ a String or a Text`
			`# +returns+ the result of (to_s <=> arg.to_s)`
			`def <=>( other )`
			`to_s() <=> other.to_s`
			`end`

			`REFERENCE = /#{Entity::REFERENCE}/`
			`# Returns the string value of this text node. This string is always`
			`# escaped, meaning that it is a valid XML text node string, and all`
			`# entities that can be escaped, have been inserted. This method respects`
			`# the entity filter set in the constructor.`
			`#`
			`# # Assume that the entity "s" is defined to be "sean", and that the`
			`# # entity "r" is defined to be "russell"`
			`# t = Text.new( "< & sean russell", false, nil, false, ['s'] )`
			`# t.to_s #-> "< & &s; russell"`
			`# t = Text.new( "< & &s; russell", false, nil, false )`
			`# t.to_s #-> "< & &s; russell"`
			`# u = Text.new( "sean russell", false, nil, true )`
			`# u.to_s #-> "sean russell"`
			`def to_s`
			`return @string if @raw`
			`return @normalized if @normalized`

			`doctype = nil`
			`if @parent`
			`doc = @parent.document`
			`doctype = doc.doctype if doc`
			`end`

			`@normalized = Text::normalize( @string, doctype, @entity_filter )`
			`end`

			`# Returns the string value of this text. This is the text without`
			`# entities, as it might be used programmatically, or printed to the`
			`# console. This ignores the 'raw' attribute setting, and any`
			`# entity_filter.`
			`#`
			`# # Assume that the entity "s" is defined to be "sean", and that the`
			`# # entity "r" is defined to be "russell"`
			`# t = Text.new( "< & sean russell", false, nil, false, ['s'] )`
			`# t.string #-> "< & sean russell"`
			`# t = Text.new( "< & &s; russell", false, nil, false )`
			`# t.string #-> "< & sean russell"`
			`# u = Text.new( "sean russell", false, nil, true )`
			`# u.string #-> "sean russell"`
			`def value`
			`@unnormalized if @unnormalized`
			`doctype = nil`
			`if @parent`
			`doc = @parent.document`
			`doctype = doc.doctype if doc`
			`end`
			`@unnormalized = Text::unnormalize( @string, doctype )`
			`end`
@@ Fix for the XPath descendant* result set ordering bug @@ @@ SAX2 listener bug fixes @@ @@ Undid a code change that caused a 10x speed regression @@ @@ Indentation fixes, and a new word wrapping feature for text nodes was contributed by Devin Bayer (documentation forthcoming; see the change logs for now) @@ The XPath bug fix is really ugly and inefficient, but I spent two days hacking at it and this was the best I could come up with. The SAX2 listener fixes had to do with crashes in certain conditions, like when there was a carriage return at the end of a document Several people submitted patches for the speed regression; it is embarrassing how long it took me to get around to looking at this. To this day, I don't know where the offending code came from. Encoding fixes Added a contributed word wrapping option for text formatting. Devin Bayer contributed this. Here's his comment: "Setting :wordwrapping to :all, wordwraps all text nodes longer than 60 characters. Setting :indentstyle to aString, make aString used as indentation, instead of the default ' '. And as long as :respect_whitespace isn't set for the element, multiline text nodes will be indented." git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5696 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2004-02-13 17:40:14 -05:00
			`def wrap(string, width, addnewline=false)`
			`# Recursivly wrap string at width.`
			`return string if string.length <= width`
			`place = string.rindex(' ', width) # Position in string with last ' ' before cutoff`
			`if addnewline then`
			`return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)`
			`else`
			`return string[0,place] + "\n" + wrap(string[place+1..-1], width)`
			`end`
			`end`

			`def indent(string, level=1, style="\t", indentfirstline=true)`
			`return string if level < 0`
			`new_string = ''`
			`string.each { \|line\|`
			`indent_string = style * level`
			`new_line = (indent_string + line).sub(/[\s]+$/,'')`
			`new_string << new_line`
			`}`
			`new_string.strip! unless indentfirstline`
			`return new_string`
			`end`

Initial revision git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2003-06-09 21:31:01 -04:00			`def write( writer, indent=-1, transitive=false, ie_hack=false )`
@@ Fix for the XPath descendant* result set ordering bug @@ @@ SAX2 listener bug fixes @@ @@ Undid a code change that caused a 10x speed regression @@ @@ Indentation fixes, and a new word wrapping feature for text nodes was contributed by Devin Bayer (documentation forthcoming; see the change logs for now) @@ The XPath bug fix is really ugly and inefficient, but I spent two days hacking at it and this was the best I could come up with. The SAX2 listener fixes had to do with crashes in certain conditions, like when there was a carriage return at the end of a document Several people submitted patches for the speed regression; it is embarrassing how long it took me to get around to looking at this. To this day, I don't know where the offending code came from. Encoding fixes Added a contributed word wrapping option for text formatting. Devin Bayer contributed this. Here's his comment: "Setting :wordwrapping to :all, wordwraps all text nodes longer than 60 characters. Setting :indentstyle to aString, make aString used as indentation, instead of the default ' '. And as long as :respect_whitespace isn't set for the element, multiline text nodes will be indented." git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5696 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2004-02-13 17:40:14 -05:00			`s = to_s()`
			`if not (@parent and @parent.whitespace) then`
			`s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all`
			`if @parent and not @parent.context[:indentstyle].nil? then`
			`indentstyle = @parent.context[:indentstyle]`
			`else`
			`indentstyle = ' '`
			`end`
			`if s.count("\n") > 0 and indent > 0 then`
			`s = indent(s, indent, indentstyle, false)`
			`end`
			`end`
			`writer << s`
Initial revision git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2003-06-09 21:31:01 -04:00			`end`

			`# Writes out text, substituting special characters beforehand.`
			`# +out+ A String, IO, or any other object supporting <<( String )`
			`# +input+ the text to substitute and the write out`
			`#`
			`# z=utf8.unpack("U*")`
			`# ascOut=""`
			`# z.each{\|r\|`
			`# if r < 0x100`
			`# ascOut.concat(r.chr)`
			`# else`
			`# ascOut.concat(sprintf("&#x%x;", r))`
			`# end`
			`# }`
			`# puts ascOut`
			`def write_with_substitution out, input`
			`copy = input.clone`
			`# Doing it like this rather than in a loop improves the speed`
			`copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )`
			`copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )`
			`copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )`
			`copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )`
			`copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )`
			`copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )`
			`out << copy`
			`end`

			`# Reads text, substituting entities`
			`def Text::read_with_substitution( input, illegal=nil )`
			`copy = input.clone`

			`if copy =~ illegal`
			`raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )`
			`end if illegal`

			`copy.gsub!( /\r\n?/, "\n" )`
			`if copy.include? ?&`
			`copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )`
			`copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )`
			`copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )`
			`copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )`
			`copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )`
			`copy.gsub!( /&#0*((?:\d+)\|(?:x[a-f0-9]+));/ ) {\|m\|`
			`m=$1`
			`#m='0' if m==''`
			`m = "0#{m}" if m[0] == ?x`
			`[Integer(m)].pack('U*')`
			`}`
			`end`
			`copy`
			`end`

			`EREFERENCE = /&(?!#{Entity::NAME};)/`
			`# Escapes all possible entities`
			`def Text::normalize( input, doctype=nil, entity_filter=nil )`
			`copy = input.clone`
			`# Doing it like this rather than in a loop improves the speed`
			`if doctype`
			`copy.gsub!( EREFERENCE, '&' )`
			`doctype.entities.each_value do \|entity\|`
			`copy.gsub!( entity.value,`
			`"&#{entity.name};" ) if entity.value and`
			`not( entity_filter and entity_filter.include?(entity) )`
			`end`
			`else`
			`copy.gsub!( EREFERENCE, '&' )`
			`DocType::DEFAULT_ENTITIES.each_value do \|entity\|`
			`copy.gsub!(entity.value, "&#{entity.name};" )`
			`end`
			`end`
			`copy`
			`end`

			`# Unescapes all possible entities`
			`def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )`
			`rv = string.clone`
			`rv.gsub!( /\r\n?/, "\n" )`
* Added the lower-case Shift-JIS files to the manifest. The upper-case ones should be deprecated, but I need a Shift-JIS encoded XML file to test against, first. * Added support for maintaining external entity occurances in DTDs * Deprecated the use of Document::DECLARATION. The new default declaration can be gotten with XMLDecl::default() * Refactored the encoding support code. It should be more robust now, and fixes a few bugs. * The XPath string() function now deals with Element nodes properly. * Serialization with Output objects now works as would be expected. * Various code cleanups, some reducing the number of warnings that Ruby 1.8.x produces with REXML. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5144 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2003-12-08 21:41:33 -05:00			`matches = rv.scan( REFERENCE )`
Initial revision git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2003-06-09 21:31:01 -04:00			`return rv if matches.size == 0`
			`rv.gsub!( NUMERICENTITY ) {\|m\|`
			`m=$1`
			`m = "0#{m}" if m[0] == ?x`
			`[Integer(m)].pack('U*')`
			`}`
			`matches.collect!{\|x\|x[0]}.compact!`
			`if matches.size > 0`
			`if doctype`
			`matches.each do \|entity_reference\|`
			`unless filter and filter.include?(entity_reference)`
			`entity_value = doctype.entity( entity_reference )`
			`re = /&#{entity_reference};/`
			`rv.gsub!( re, entity_value ) if entity_value`
			`end`
			`end`
			`else`
			`matches.each do \|entity_reference\|`
			`unless filter and filter.include?(entity_reference)`
			`entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]`
			`re = /&#{entity_reference};/`
			`rv.gsub!( re, entity_value.value ) if entity_value`
			`end`
			`end`
			`end`
			`rv.gsub!( /&/, '&' )`
			`end`
			`rv`
			`end`
			`end`
			`end`