mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Cross-ported the REXML changes from HEAD to the 1.8 branch.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@8486 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
d4d497dd86
commit
ed512acb2f
23 changed files with 1332 additions and 1036 deletions
|
@ -5,180 +5,182 @@ require 'rexml/doctype'
|
|||
require 'rexml/parseexception'
|
||||
|
||||
module REXML
|
||||
# Represents text nodes in an XML document
|
||||
class Text < Child
|
||||
include Comparable
|
||||
# The order in which the substitutions occur
|
||||
SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
|
||||
SUBSTITUTES = ['&', '<', '>', '"', ''', ' ']
|
||||
# Characters which are substituted in written strings
|
||||
SLAICEPS = [ '<', '>', '"', "'", '&' ]
|
||||
SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ]
|
||||
# Represents text nodes in an XML document
|
||||
class Text < Child
|
||||
include Comparable
|
||||
# The order in which the substitutions occur
|
||||
SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
|
||||
SUBSTITUTES = ['&', '<', '>', '"', ''', ' ']
|
||||
# Characters which are substituted in written strings
|
||||
SLAICEPS = [ '<', '>', '"', "'", '&' ]
|
||||
SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ]
|
||||
|
||||
# If +raw+ is true, then REXML leaves the value alone
|
||||
attr_accessor :raw
|
||||
# If +raw+ is true, then REXML leaves the value alone
|
||||
attr_accessor :raw
|
||||
|
||||
ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
|
||||
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
||||
ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
|
||||
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
||||
|
||||
# Constructor
|
||||
# +arg+ if a String, the content is set to the String. If a Text,
|
||||
# the object is shallowly cloned.
|
||||
#
|
||||
# +respect_whitespace+ (boolean, false) if true, whitespace is
|
||||
# respected
|
||||
#
|
||||
# +parent+ (nil) if this is a Parent object, the parent
|
||||
# will be set to this.
|
||||
#
|
||||
# +raw+ (nil) This argument can be given three values.
|
||||
# If true, then the value of used to construct this object is expected to
|
||||
# contain no unescaped XML markup, and REXML will not change the text. If
|
||||
# this value is false, the string may contain any characters, and REXML will
|
||||
# escape any and all defined entities whose values are contained in the
|
||||
# text. If this value is nil (the default), then the raw value of the
|
||||
# parent will be used as the raw value for this node. If there is no raw
|
||||
# value for the parent, and no value is supplied, the default is false.
|
||||
# Text.new( "<&", false, nil, false ) #-> "<&"
|
||||
# Text.new( "<&", false, nil, true ) #-> IllegalArgumentException
|
||||
# Text.new( "<&", false, nil, true ) #-> "<&"
|
||||
# # Assume that the entity "s" is defined to be "sean"
|
||||
# # and that the entity "r" is defined to be "russell"
|
||||
# Text.new( "sean russell" ) #-> "&s; &r;"
|
||||
# Text.new( "sean russell", false, nil, true ) #-> "sean russell"
|
||||
#
|
||||
# +entity_filter+ (nil) This can be an array of entities to match in the
|
||||
# supplied text. This argument is only useful if +raw+ is set to false.
|
||||
# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
|
||||
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
|
||||
# In the last example, the +entity_filter+ argument is ignored.
|
||||
#
|
||||
# +pattern+ INTERNAL USE ONLY
|
||||
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
||||
entity_filter=nil, illegal=ILLEGAL )
|
||||
# Constructor
|
||||
# +arg+ if a String, the content is set to the String. If a Text,
|
||||
# the object is shallowly cloned.
|
||||
#
|
||||
# +respect_whitespace+ (boolean, false) if true, whitespace is
|
||||
# respected
|
||||
#
|
||||
# +parent+ (nil) if this is a Parent object, the parent
|
||||
# will be set to this.
|
||||
#
|
||||
# +raw+ (nil) This argument can be given three values.
|
||||
# If true, then the value of used to construct this object is expected to
|
||||
# contain no unescaped XML markup, and REXML will not change the text. If
|
||||
# this value is false, the string may contain any characters, and REXML will
|
||||
# escape any and all defined entities whose values are contained in the
|
||||
# text. If this value is nil (the default), then the raw value of the
|
||||
# parent will be used as the raw value for this node. If there is no raw
|
||||
# value for the parent, and no value is supplied, the default is false.
|
||||
# Text.new( "<&", false, nil, false ) #-> "<&"
|
||||
# Text.new( "<&", false, nil, true ) #-> IllegalArgumentException
|
||||
# Text.new( "<&", false, nil, true ) #-> "<&"
|
||||
# # Assume that the entity "s" is defined to be "sean"
|
||||
# # and that the entity "r" is defined to be "russell"
|
||||
# Text.new( "sean russell" ) #-> "&s; &r;"
|
||||
# Text.new( "sean russell", false, nil, true ) #-> "sean russell"
|
||||
#
|
||||
# +entity_filter+ (nil) This can be an array of entities to match in the
|
||||
# supplied text. This argument is only useful if +raw+ is set to false.
|
||||
# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
|
||||
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
|
||||
# In the last example, the +entity_filter+ argument is ignored.
|
||||
#
|
||||
# +pattern+ INTERNAL USE ONLY
|
||||
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
||||
entity_filter=nil, illegal=ILLEGAL )
|
||||
|
||||
@raw = false
|
||||
@raw = false
|
||||
|
||||
if parent
|
||||
super( parent )
|
||||
@raw = parent.raw
|
||||
else
|
||||
@parent = nil
|
||||
end
|
||||
if parent
|
||||
super( parent )
|
||||
@raw = parent.raw
|
||||
else
|
||||
@parent = nil
|
||||
end
|
||||
|
||||
@raw = raw unless raw.nil?
|
||||
@entity_filter = entity_filter
|
||||
@normalized = @unnormalized = nil
|
||||
@raw = raw unless raw.nil?
|
||||
@entity_filter = entity_filter
|
||||
@normalized = @unnormalized = nil
|
||||
|
||||
if arg.kind_of? String
|
||||
@string = arg.clone
|
||||
@string.squeeze!(" \n\t") unless respect_whitespace
|
||||
elsif arg.kind_of? Text
|
||||
@string = arg.to_s
|
||||
@raw = arg.raw
|
||||
elsif
|
||||
raise Exception.new( "Illegal argument of type #{arg.type} for Text constructor (#{arg})" )
|
||||
end
|
||||
if arg.kind_of? String
|
||||
@string = arg.clone
|
||||
@string.squeeze!(" \n\t") unless respect_whitespace
|
||||
elsif arg.kind_of? Text
|
||||
@string = arg.to_s
|
||||
@raw = arg.raw
|
||||
elsif
|
||||
raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
|
||||
end
|
||||
|
||||
@string.gsub!( /\r\n?/, "\n" )
|
||||
@string.gsub!( /\r\n?/, "\n" )
|
||||
|
||||
# check for illegal characters
|
||||
if @raw
|
||||
if @string =~ illegal
|
||||
raise Exception.new(
|
||||
"Illegal character '#{$1}' in raw string \"#{@string}\""
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
# check for illegal characters
|
||||
if @raw
|
||||
if @string =~ illegal
|
||||
raise "Illegal character '#{$1}' in raw string \"#{@string}\""
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def node_type
|
||||
:text
|
||||
end
|
||||
def node_type
|
||||
:text
|
||||
end
|
||||
|
||||
def empty?
|
||||
@string.size==0
|
||||
end
|
||||
def empty?
|
||||
@string.size==0
|
||||
end
|
||||
|
||||
|
||||
def clone
|
||||
return Text.new(self)
|
||||
end
|
||||
def clone
|
||||
return Text.new(self)
|
||||
end
|
||||
|
||||
|
||||
# Appends text to this text node. The text is appended in the +raw+ mode
|
||||
# of this text node.
|
||||
def <<( to_append )
|
||||
@string << to_append.gsub( /\r\n?/, "\n" )
|
||||
end
|
||||
# Appends text to this text node. The text is appended in the +raw+ mode
|
||||
# of this text node.
|
||||
def <<( to_append )
|
||||
@string << to_append.gsub( /\r\n?/, "\n" )
|
||||
end
|
||||
|
||||
|
||||
# +other+ a String or a Text
|
||||
# +returns+ the result of (to_s <=> arg.to_s)
|
||||
def <=>( other )
|
||||
to_s() <=> other.to_s
|
||||
end
|
||||
# +other+ a String or a Text
|
||||
# +returns+ the result of (to_s <=> arg.to_s)
|
||||
def <=>( other )
|
||||
to_s() <=> other.to_s
|
||||
end
|
||||
|
||||
REFERENCE = /#{Entity::REFERENCE}/
|
||||
# Returns the string value of this text node. This string is always
|
||||
# escaped, meaning that it is a valid XML text node string, and all
|
||||
# entities that can be escaped, have been inserted. This method respects
|
||||
# the entity filter set in the constructor.
|
||||
#
|
||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
||||
# # entity "r" is defined to be "russell"
|
||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
||||
# t.to_s #-> "< & &s; russell"
|
||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
||||
# t.to_s #-> "< & &s; russell"
|
||||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.to_s #-> "sean russell"
|
||||
def to_s
|
||||
return @string if @raw
|
||||
return @normalized if @normalized
|
||||
REFERENCE = /#{Entity::REFERENCE}/
|
||||
# Returns the string value of this text node. This string is always
|
||||
# escaped, meaning that it is a valid XML text node string, and all
|
||||
# entities that can be escaped, have been inserted. This method respects
|
||||
# the entity filter set in the constructor.
|
||||
#
|
||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
||||
# # entity "r" is defined to be "russell"
|
||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
||||
# t.to_s #-> "< & &s; russell"
|
||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
||||
# t.to_s #-> "< & &s; russell"
|
||||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.to_s #-> "sean russell"
|
||||
def to_s
|
||||
return @string if @raw
|
||||
return @normalized if @normalized
|
||||
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
|
||||
@normalized = Text::normalize( @string, doctype, @entity_filter )
|
||||
end
|
||||
@normalized = Text::normalize( @string, doctype, @entity_filter )
|
||||
end
|
||||
|
||||
# Returns the string value of this text. This is the text without
|
||||
# entities, as it might be used programmatically, or printed to the
|
||||
# console. This ignores the 'raw' attribute setting, and any
|
||||
# entity_filter.
|
||||
#
|
||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
||||
# # entity "r" is defined to be "russell"
|
||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
||||
# t.string #-> "< & sean russell"
|
||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
||||
# t.string #-> "< & sean russell"
|
||||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.string #-> "sean russell"
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@unnormalized = Text::unnormalize( @string, doctype )
|
||||
end
|
||||
|
||||
def wrap(string, width, addnewline=false)
|
||||
# Recursivly wrap string at width.
|
||||
return string if string.length <= width
|
||||
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
||||
if addnewline then
|
||||
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||
else
|
||||
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||
end
|
||||
end
|
||||
def inspect
|
||||
@string.inspect
|
||||
end
|
||||
|
||||
# Returns the string value of this text. This is the text without
|
||||
# entities, as it might be used programmatically, or printed to the
|
||||
# console. This ignores the 'raw' attribute setting, and any
|
||||
# entity_filter.
|
||||
#
|
||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
||||
# # entity "r" is defined to be "russell"
|
||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
||||
# t.string #-> "< & sean russell"
|
||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
||||
# t.string #-> "< & sean russell"
|
||||
# u = Text.new( "sean russell", false, nil, true )
|
||||
# u.string #-> "sean russell"
|
||||
def value
|
||||
@unnormalized if @unnormalized
|
||||
doctype = nil
|
||||
if @parent
|
||||
doc = @parent.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@unnormalized = Text::unnormalize( @string, doctype )
|
||||
end
|
||||
|
||||
def wrap(string, width, addnewline=false)
|
||||
# Recursivly wrap string at width.
|
||||
return string if string.length <= width
|
||||
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
||||
if addnewline then
|
||||
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||
else
|
||||
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||
end
|
||||
end
|
||||
|
||||
# Sets the contents of this text node. This expects the text to be
|
||||
# unnormalized. It returns self.
|
||||
|
@ -188,26 +190,26 @@ module REXML
|
|||
# e[0].value = "bar" # <a>bar</a>
|
||||
# e[0].value = "<a>" # <a><a></a>
|
||||
def value=( val )
|
||||
@string = val.gsub( /\r\n?/, "\n" )
|
||||
@string = val.gsub( /\r\n?/, "\n" )
|
||||
@unnormalized = nil
|
||||
@normalized = nil
|
||||
@raw = false
|
||||
end
|
||||
|
||||
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
||||
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
||||
return string if level < 0
|
||||
new_string = ''
|
||||
string.each { |line|
|
||||
indent_string = style * level
|
||||
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
||||
new_string << new_line
|
||||
}
|
||||
new_string.strip! unless indentfirstline
|
||||
return new_string
|
||||
end
|
||||
new_string = ''
|
||||
string.each { |line|
|
||||
indent_string = style * level
|
||||
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
||||
new_string << new_line
|
||||
}
|
||||
new_string.strip! unless indentfirstline
|
||||
return new_string
|
||||
end
|
||||
|
||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
||||
s = to_s()
|
||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
||||
s = to_s()
|
||||
if not (@parent and @parent.whitespace) then
|
||||
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
|
||||
if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
|
||||
|
@ -216,7 +218,7 @@ module REXML
|
|||
s.squeeze!(" \n\t") if @parent and !@parent.whitespace
|
||||
end
|
||||
writer << s
|
||||
end
|
||||
end
|
||||
|
||||
# FIXME
|
||||
# This probably won't work properly
|
||||
|
@ -226,111 +228,111 @@ module REXML
|
|||
return path
|
||||
end
|
||||
|
||||
# Writes out text, substituting special characters beforehand.
|
||||
# +out+ A String, IO, or any other object supporting <<( String )
|
||||
# +input+ the text to substitute and the write out
|
||||
#
|
||||
# z=utf8.unpack("U*")
|
||||
# ascOut=""
|
||||
# z.each{|r|
|
||||
# if r < 0x100
|
||||
# ascOut.concat(r.chr)
|
||||
# else
|
||||
# ascOut.concat(sprintf("&#x%x;", r))
|
||||
# end
|
||||
# }
|
||||
# puts ascOut
|
||||
def write_with_substitution out, input
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
|
||||
copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
|
||||
copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
|
||||
copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
|
||||
copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
|
||||
copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
|
||||
out << copy
|
||||
end
|
||||
# Writes out text, substituting special characters beforehand.
|
||||
# +out+ A String, IO, or any other object supporting <<( String )
|
||||
# +input+ the text to substitute and the write out
|
||||
#
|
||||
# z=utf8.unpack("U*")
|
||||
# ascOut=""
|
||||
# z.each{|r|
|
||||
# if r < 0x100
|
||||
# ascOut.concat(r.chr)
|
||||
# else
|
||||
# ascOut.concat(sprintf("&#x%x;", r))
|
||||
# end
|
||||
# }
|
||||
# puts ascOut
|
||||
def write_with_substitution out, input
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
|
||||
copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
|
||||
copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
|
||||
copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
|
||||
copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
|
||||
copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
|
||||
out << copy
|
||||
end
|
||||
|
||||
# Reads text, substituting entities
|
||||
def Text::read_with_substitution( input, illegal=nil )
|
||||
copy = input.clone
|
||||
# Reads text, substituting entities
|
||||
def Text::read_with_substitution( input, illegal=nil )
|
||||
copy = input.clone
|
||||
|
||||
if copy =~ illegal
|
||||
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
|
||||
end if illegal
|
||||
|
||||
copy.gsub!( /\r\n?/, "\n" )
|
||||
if copy.include? ?&
|
||||
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
|
||||
copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
|
||||
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
|
||||
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
|
||||
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
|
||||
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
|
||||
m=$1
|
||||
#m='0' if m==''
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
end
|
||||
copy
|
||||
end
|
||||
if copy =~ illegal
|
||||
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
|
||||
end if illegal
|
||||
|
||||
copy.gsub!( /\r\n?/, "\n" )
|
||||
if copy.include? ?&
|
||||
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
|
||||
copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
|
||||
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
|
||||
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
|
||||
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
|
||||
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
|
||||
m=$1
|
||||
#m='0' if m==''
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
end
|
||||
copy
|
||||
end
|
||||
|
||||
EREFERENCE = /&(?!#{Entity::NAME};)/
|
||||
# Escapes all possible entities
|
||||
def Text::normalize( input, doctype=nil, entity_filter=nil )
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
if doctype
|
||||
copy = copy.gsub( EREFERENCE, '&' )
|
||||
doctype.entities.each_value do |entity|
|
||||
copy = copy.gsub( entity.value,
|
||||
"&#{entity.name};" ) if entity.value and
|
||||
not( entity_filter and entity_filter.include?(entity) )
|
||||
end
|
||||
else
|
||||
copy = copy.gsub( EREFERENCE, '&' )
|
||||
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
||||
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
||||
end
|
||||
end
|
||||
copy
|
||||
end
|
||||
EREFERENCE = /&(?!#{Entity::NAME};)/
|
||||
# Escapes all possible entities
|
||||
def Text::normalize( input, doctype=nil, entity_filter=nil )
|
||||
copy = input.clone
|
||||
# Doing it like this rather than in a loop improves the speed
|
||||
if doctype
|
||||
copy = copy.gsub( EREFERENCE, '&' )
|
||||
doctype.entities.each_value do |entity|
|
||||
copy = copy.gsub( entity.value,
|
||||
"&#{entity.name};" ) if entity.value and
|
||||
not( entity_filter and entity_filter.include?(entity) )
|
||||
end
|
||||
else
|
||||
copy = copy.gsub( EREFERENCE, '&' )
|
||||
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
||||
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
||||
end
|
||||
end
|
||||
copy
|
||||
end
|
||||
|
||||
# Unescapes all possible entities
|
||||
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
||||
rv = string.clone
|
||||
rv.gsub!( /\r\n?/, "\n" )
|
||||
matches = rv.scan( REFERENCE )
|
||||
return rv if matches.size == 0
|
||||
rv.gsub!( NUMERICENTITY ) {|m|
|
||||
m=$1
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
matches.collect!{|x|x[0]}.compact!
|
||||
if matches.size > 0
|
||||
if doctype
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = doctype.entity( entity_reference )
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value ) if entity_value
|
||||
end
|
||||
end
|
||||
else
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value.value ) if entity_value
|
||||
end
|
||||
end
|
||||
end
|
||||
rv.gsub!( /&/, '&' )
|
||||
end
|
||||
rv
|
||||
end
|
||||
end
|
||||
# Unescapes all possible entities
|
||||
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
||||
rv = string.clone
|
||||
rv.gsub!( /\r\n?/, "\n" )
|
||||
matches = rv.scan( REFERENCE )
|
||||
return rv if matches.size == 0
|
||||
rv.gsub!( NUMERICENTITY ) {|m|
|
||||
m=$1
|
||||
m = "0#{m}" if m[0] == ?x
|
||||
[Integer(m)].pack('U*')
|
||||
}
|
||||
matches.collect!{|x|x[0]}.compact!
|
||||
if matches.size > 0
|
||||
if doctype
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = doctype.entity( entity_reference )
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value ) if entity_value
|
||||
end
|
||||
end
|
||||
else
|
||||
matches.each do |entity_reference|
|
||||
unless filter and filter.include?(entity_reference)
|
||||
entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
|
||||
re = /&#{entity_reference};/
|
||||
rv.gsub!( re, entity_value.value ) if entity_value
|
||||
end
|
||||
end
|
||||
end
|
||||
rv.gsub!( /&/, '&' )
|
||||
end
|
||||
rv
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue