mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Merges upstream changes for REXML v3.1.7
http://www.germane-software.com/repos/rexml/tags/3.1.7 r1278@bean: ser | 2007-06-07 00:53:06 -0400 Fixed a double-encoding bug. This was a regression, related to ticket:48. r1292@bean: ser | 2007-07-25 08:19:36 -0400 r1279@bean: ser | 2007-06-09 23:19:02 -0400 Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same problem. Also in this patch is a fix to merge.rb (unused, but it should at least contain no errors), and a unit test for ticket:88. r1293@bean: ser | 2007-07-25 08:19:37 -0400 r1281@bean: ser | 2007-07-24 11:08:48 -0400 Addresses ticket:85 This is a major rewrite of the XML formatting code. The XML writers have all been extracted out of the classes and put into their own class containers. This makes writing parsers easier, and cleaner. There are three formatters, which correspond to the previous three XML writing modes: REXML::Formatters::Default Prints the XML document exactly as it was parsed REXML::Formatters::Pretty Pretty prints the XML document, destroying whitespace in the document REXML::Formatters::Transitive Pretty prints the XML document, preserving whitespace All of the write() functions have been deprecated (some are still used, but these will also go away) except the write() function on Document, which is left for convenience. To pretty print an XML document the canonical way: formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces formatter.write( document, output ) r1294@bean: ser | 2007-07-25 08:19:38 -0400 r1283@bean: ser | 2007-07-24 19:53:30 -0400 This goes with the previous commit. r1295@bean: ser | 2007-07-25 08:19:39 -0400 r1285@bean: ser | 2007-07-24 20:02:07 -0400 And THIS goes with the previous two patches. Dammit. r1296@bean: ser | 2007-07-25 08:19:40 -0400 r1287@bean: ser | 2007-07-24 20:12:25 -0400 Applied patch from Jeff Barczewski. Note that this changes what the values of the name and IDs are from the previous behavior -- the values no longer include the quotes. This is the correct behavior, so I'm leaving it in, but it is not backwards compatible. Also fixes the serializer so that it outputs the doctype in a correct format (needed as a result of this change). r1297@bean: ser | 2007-07-25 08:38:38 -0400 Version update git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
fead3c93e7
commit
1d8c98a486
23 changed files with 1689 additions and 1328 deletions
|
@ -18,16 +18,32 @@ module REXML
|
|||
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
||||
|
||||
# Constructor.
|
||||
# FIXME: The parser doesn't catch illegal characters in attributes
|
||||
#
|
||||
# first::
|
||||
# Either: an Attribute, which this new attribute will become a
|
||||
# clone of; or a String, which is the name of this attribute
|
||||
# second::
|
||||
# If +first+ is an Attribute, then this may be an Element, or nil.
|
||||
# If nil, then the Element parent of this attribute is the parent
|
||||
# of the +first+ Attribute. If the first argument is a String,
|
||||
# then this must also be a String, and is the content of the attribute.
|
||||
# If this is the content, it must be fully normalized (contain no
|
||||
# illegal characters).
|
||||
# parent::
|
||||
# Ignored unless +first+ is a String; otherwise, may be the Element
|
||||
# parent of this attribute, or nil.
|
||||
#
|
||||
#
|
||||
# Attribute.new( attribute_to_clone )
|
||||
# Attribute.new( source )
|
||||
# Attribute.new( attribute_to_clone, parent_element )
|
||||
# Attribute.new( "attr", "attr_value" )
|
||||
# Attribute.new( "attr", "attr_value", parent_element )
|
||||
def initialize( first, second=nil, parent=nil )
|
||||
@normalized = @unnormalized = @element = nil
|
||||
if first.kind_of? Attribute
|
||||
self.name = first.expanded_name
|
||||
@value = first.value
|
||||
@unnormalized = first.value
|
||||
if second.kind_of? Element
|
||||
@element = second
|
||||
else
|
||||
|
@ -36,7 +52,7 @@ module REXML
|
|||
elsif first.kind_of? String
|
||||
@element = parent if parent.kind_of? Element
|
||||
self.name = first
|
||||
@value = second.to_s
|
||||
@normalized = second.to_s
|
||||
else
|
||||
raise "illegal argument #{first.class.name} to Attribute constructor"
|
||||
end
|
||||
|
@ -72,7 +88,7 @@ module REXML
|
|||
# Returns true if other is an Attribute and has the same name and value,
|
||||
# false otherwise.
|
||||
def ==( other )
|
||||
other.kind_of?(Attribute) and other.name==name and other.value==@value
|
||||
other.kind_of?(Attribute) and other.name==name and other.value==value
|
||||
end
|
||||
|
||||
# Creates (and returns) a hash from both the name and value
|
||||
|
@ -87,8 +103,12 @@ module REXML
|
|||
# b = Attribute.new( "ns:x", "y" )
|
||||
# b.to_string # -> "ns:x='y'"
|
||||
def to_string
|
||||
if @element and @element.context and @element.context[:attribute_quote] == :quote
|
||||
%Q^#@expanded_name="#{to_s().gsub(/"/, '"e;')}"^
|
||||
else
|
||||
"#@expanded_name='#{to_s().gsub(/'/, ''')}'"
|
||||
end
|
||||
end
|
||||
|
||||
# Returns the attribute value, with entities replaced
|
||||
def to_s
|
||||
|
@ -100,8 +120,9 @@ module REXML
|
|||
doctype = doc.doctype if doc
|
||||
end
|
||||
|
||||
@normalized = Text::normalize( @unnormalized, doctype )
|
||||
@unnormalized = nil
|
||||
@normalized = Text::normalize( @value, doctype )
|
||||
@normalized
|
||||
end
|
||||
|
||||
# Returns the UNNORMALIZED value of this attribute. That is, entities
|
||||
|
@ -113,8 +134,9 @@ module REXML
|
|||
doc = @element.document
|
||||
doctype = doc.doctype if doc
|
||||
end
|
||||
@unnormalized = Text::unnormalize( @normalized, doctype )
|
||||
@normalized = nil
|
||||
@unnormalized = Text::unnormalize( @value, doctype )
|
||||
@unnormalized
|
||||
end
|
||||
|
||||
# Returns a copy of this attribute
|
||||
|
|
|
@ -39,31 +39,26 @@ module REXML
|
|||
@string
|
||||
end
|
||||
|
||||
# == DEPRECATED
|
||||
# See the rexml/formatters package
|
||||
#
|
||||
# Generates XML output of this object
|
||||
#
|
||||
# output::
|
||||
# Where to write the string. Defaults to $stdout
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount. Defaults to -1.
|
||||
# The amount to indent this node by
|
||||
# transitive::
|
||||
# If transitive is true and indent is >= 0, then the output will be
|
||||
# pretty-printed in such a way that the added whitespace does not affect
|
||||
# the absolute *value* of the document -- that is, it leaves the value
|
||||
# and number of Text nodes in the document unchanged.
|
||||
# Ignored
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags.
|
||||
# Ignored
|
||||
#
|
||||
# _Examples_
|
||||
# c = CData.new( " Some text " )
|
||||
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
|
||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||
#indent( output, indent ) unless transitive
|
||||
Kernel.warn( "#{self.class.name}.write is deprecated" )
|
||||
indent( output, indent )
|
||||
output << START
|
||||
output << @string
|
||||
output << STOP
|
||||
|
|
|
@ -34,6 +34,9 @@ module REXML
|
|||
Comment.new self
|
||||
end
|
||||
|
||||
# == DEPRECATED
|
||||
# See REXML::Formatters
|
||||
#
|
||||
# output::
|
||||
# Where to write the string
|
||||
# indent::
|
||||
|
@ -45,6 +48,7 @@ module REXML
|
|||
# ie_hack::
|
||||
# Needed for conformity to the child API, but not used by this class.
|
||||
def write( output, indent=-1, transitive=false, ie_hack=false )
|
||||
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
|
||||
indent( output, indent )
|
||||
output << START
|
||||
output << @string
|
||||
|
|
|
@ -98,38 +98,30 @@ module REXML
|
|||
# output::
|
||||
# Where to write the string
|
||||
# indent::
|
||||
# An integer. If -1, no indenting will be used; otherwise, the
|
||||
# An integer. If -1, no indentation will be used; otherwise, the
|
||||
# indentation will be this number of spaces, and children will be
|
||||
# indented an additional amount.
|
||||
# transitive::
|
||||
# If transitive is true and indent is >= 0, then the output will be
|
||||
# pretty-printed in such a way that the added whitespace does not affect
|
||||
# the absolute *value* of the document -- that is, it leaves the value
|
||||
# and number of Text nodes in the document unchanged.
|
||||
# Ignored
|
||||
# ie_hack::
|
||||
# Internet Explorer is the worst piece of crap to have ever been
|
||||
# written, with the possible exception of Windows itself. Since IE is
|
||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags.
|
||||
#
|
||||
# Ignored
|
||||
def write( output, indent=0, transitive=false, ie_hack=false )
|
||||
f = REXML::Formatters::Default.new
|
||||
indent( output, indent )
|
||||
output << START
|
||||
output << ' '
|
||||
output << @name
|
||||
output << " #@external_id" if @external_id
|
||||
output << " #@long_name" if @long_name
|
||||
output << " #@uri" if @uri
|
||||
output << " #{@long_name.inspect}" if @long_name
|
||||
output << " #{@uri.inspect}" if @uri
|
||||
unless @children.empty?
|
||||
next_indent = indent + 1
|
||||
output << ' ['
|
||||
child = nil # speed
|
||||
@children.each { |child|
|
||||
output << "\n"
|
||||
child.write( output, next_indent )
|
||||
f.write( child, output )
|
||||
}
|
||||
#output << ' '*next_indent
|
||||
output << "\n]"
|
||||
end
|
||||
output << STOP
|
||||
|
@ -219,8 +211,10 @@ module REXML
|
|||
@string+'>'
|
||||
end
|
||||
|
||||
# == DEPRECATED
|
||||
# See REXML::Formatters
|
||||
#
|
||||
def write( output, indent )
|
||||
output << (' '*indent) if indent > 0
|
||||
output << to_s
|
||||
end
|
||||
end
|
||||
|
@ -264,7 +258,6 @@ module REXML
|
|||
end
|
||||
|
||||
def write( output, indent=-1 )
|
||||
output << (' '*indent) if indent > 0
|
||||
output << to_s
|
||||
end
|
||||
|
||||
|
|
|
@ -31,9 +31,6 @@ module REXML
|
|||
# to be sources of valid XML documents.
|
||||
# @param context if supplied, contains the context of the document;
|
||||
# this should be a Hash.
|
||||
# NOTE that I'm not sure what the context is for; I cloned it out of
|
||||
# the Electric XML API (in which it also seems to do nothing), and it
|
||||
# is now legacy. It may do something, someday... it may disappear.
|
||||
def initialize( source = nil, context = {} )
|
||||
super()
|
||||
@context = context
|
||||
|
@ -142,15 +139,54 @@ module REXML
|
|||
xml_decl().stand_alone?
|
||||
end
|
||||
|
||||
# Write the XML tree out, optionally with indent. This writes out the
|
||||
# entire XML document, including XML declarations, doctype declarations,
|
||||
# and processing instructions (if any are given).
|
||||
# Write the XML tree out. This writes the entire XML document, including
|
||||
# declarations and processing instructions.
|
||||
#
|
||||
# A controversial point is whether Document should always write the XML
|
||||
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
||||
# user (or source document). REXML does not write one if one was not
|
||||
# specified, because it adds unneccessary bandwidth to applications such
|
||||
# as XML-RPC.
|
||||
#
|
||||
# _Examples_
|
||||
# Document.new("<a><b/></a>").serialize
|
||||
#
|
||||
# output_string = ""
|
||||
# tr = Transitive.new( output_string )
|
||||
# Document.new("<a><b/></a>").serialize( tr )
|
||||
#
|
||||
# formatter::
|
||||
# One of the rexml/formatters classes. If none is given, then the Pretty
|
||||
# formatter will be used to dump the XML to the STDOUT.
|
||||
def serialize( formatter = nil )
|
||||
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||
output = Output.new( output, xml_decl.encoding )
|
||||
end
|
||||
|
||||
formatter = REXML::Pretty.new( $stdout ) if (formatter.nil?)
|
||||
|
||||
@children.each { |node|
|
||||
puts "node = #{node.inspect}"
|
||||
indent( output, indent ) if node.node_type == :element
|
||||
if node.write( output, indent, transitive, ie_hack )
|
||||
output << "\n" unless indent<0 or node == @children[-1]
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
# Write the XML tree out, optionally with indent. This writes out the
|
||||
# entire XML document, including XML declarations, doctype declarations,
|
||||
# and processing instructions (if any are given).
|
||||
#
|
||||
# A controversial point is whether Document should always write the XML
|
||||
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
||||
# user (or source document). REXML does not write one if one was not
|
||||
# specified, because it adds unneccessary bandwidth to applications such
|
||||
# as XML-RPC.
|
||||
#
|
||||
# See also the classes in the rexml/formatters package for the proper way
|
||||
# to change the default formatting of XML output
|
||||
#
|
||||
#
|
||||
# output::
|
||||
# output an object which supports '<< string'; this is where the
|
||||
|
@ -160,7 +196,7 @@ module REXML
|
|||
# indentation will be twice this number of spaces, and children will be
|
||||
# indented an additional amount. For a value of 3, every item will be
|
||||
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
|
||||
# transitive::
|
||||
# trans::
|
||||
# If transitive is true and indent is >= 0, then the output will be
|
||||
# pretty-printed in such a way that the added whitespace does not affect
|
||||
# the absolute *value* of the document -- that is, it leaves the value
|
||||
|
@ -171,14 +207,20 @@ module REXML
|
|||
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags. Defaults to false
|
||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||
@children.each { |node|
|
||||
indent( output, indent ) if node.node_type == :element
|
||||
if node.write( output, indent, transitive, ie_hack )
|
||||
output << "\n" unless indent<0 or node == @children[-1]
|
||||
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
|
||||
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||
output = Output.new( output, xml_decl.encoding )
|
||||
end
|
||||
}
|
||||
formatter = if indent > -1
|
||||
if transitive
|
||||
REXML::Formatters::Transitive.new( indent, ie_hack )
|
||||
else
|
||||
REXML::Formatters::Pretty.new( indent, ie_hack )
|
||||
end
|
||||
else
|
||||
REXML::Formatters::Default.new( ie_hack )
|
||||
end
|
||||
formatter.write( self, output )
|
||||
end
|
||||
|
||||
|
||||
|
|
|
@ -295,14 +295,9 @@ module REXML
|
|||
def add_element element, attrs=nil
|
||||
raise "First argument must be either an element name, or an Element object" if element.nil?
|
||||
el = @elements.add(element)
|
||||
if attrs.kind_of? Hash
|
||||
attrs.each do |key, value|
|
||||
el.attributes[key]=value if key =~ /^xmlns:/
|
||||
end
|
||||
attrs.each do |key, value|
|
||||
el.attributes[key]=value if key !~ /^xmlns:/
|
||||
end
|
||||
end
|
||||
el.attributes[key]=Attribute.new(key,value,self)
|
||||
end if attrs.kind_of? Hash
|
||||
el
|
||||
end
|
||||
|
||||
|
@ -577,7 +572,8 @@ module REXML
|
|||
# value::
|
||||
# Required if +key+ is a String, and ignored if the first argument is
|
||||
# an Attribute. This is a String, and is used as the value
|
||||
# of the new Attribute.
|
||||
# of the new Attribute. This should be the unnormalized value of the
|
||||
# attribute (without entities).
|
||||
# Returns:: the Attribute added
|
||||
# e = Element.new 'e'
|
||||
# e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
|
||||
|
@ -649,6 +645,9 @@ module REXML
|
|||
find_all { |child| child.kind_of? Text }.freeze
|
||||
end
|
||||
|
||||
# == DEPRECATED
|
||||
# See REXML::Formatters
|
||||
#
|
||||
# Writes out this element, and recursively, all children.
|
||||
# output::
|
||||
# output an object which supports '<< string'; this is where the
|
||||
|
@ -672,37 +671,17 @@ module REXML
|
|||
# doc.write( out ) #-> doc is written to the string 'out'
|
||||
# doc.write( $stdout ) #-> doc written to the console
|
||||
def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
|
||||
#print "ID:#{indent}"
|
||||
writer << "<#@expanded_name"
|
||||
|
||||
@attributes.each_attribute do |attr|
|
||||
writer << " "
|
||||
attr.write( writer, indent )
|
||||
end unless @attributes.empty?
|
||||
|
||||
if @children.empty?
|
||||
if transitive and indent>-1
|
||||
writer << "\n"
|
||||
indent( writer, indent )
|
||||
elsif ie_hack
|
||||
writer << " "
|
||||
end
|
||||
writer << "/"
|
||||
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
||||
formatter = if indent > -1
|
||||
if transitive
|
||||
REXML::Formatters::Transitive.new( indent, ie_hack )
|
||||
else
|
||||
if transitive and indent>-1 and !@children[0].kind_of? Text
|
||||
writer << "\n"
|
||||
indent writer, indent+1
|
||||
REXML::Formatters::Pretty.new( indent, ie_hack )
|
||||
end
|
||||
writer << ">"
|
||||
write_children( writer, indent, transitive, ie_hack )
|
||||
writer << "</#{expanded_name}"
|
||||
else
|
||||
REXML::Formatters::Default.new( ie_hack )
|
||||
end
|
||||
if transitive and indent>-1 and !@children.empty?
|
||||
writer << "\n"
|
||||
indent -= 1 if next_sibling.nil?
|
||||
indent(writer, indent)
|
||||
end
|
||||
writer << ">"
|
||||
formatter.write( self, output )
|
||||
end
|
||||
|
||||
|
||||
|
@ -730,29 +709,6 @@ module REXML
|
|||
return if max>0 and num == max
|
||||
}
|
||||
end
|
||||
|
||||
# A private helper method
|
||||
def write_children( writer, indent, transitive, ie_hack )
|
||||
cr = (indent < 0) ? '' : "\n"
|
||||
if indent == -1
|
||||
each { |child| child.write( writer, indent, transitive, ie_hack ) }
|
||||
else
|
||||
next_indent = indent+1
|
||||
last_child=nil
|
||||
each { |child|
|
||||
unless child.kind_of? Text or last_child.kind_of? Text or transitive
|
||||
writer << cr
|
||||
indent(writer, next_indent)
|
||||
end
|
||||
child.write( writer, next_indent, transitive, ie_hack )
|
||||
last_child = child
|
||||
}
|
||||
unless last_child.kind_of? Text or transitive
|
||||
writer << cr
|
||||
indent( writer, indent )
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
########################################################################
|
||||
|
@ -1006,10 +962,11 @@ module REXML
|
|||
# name:: an XPath attribute name. Namespaces are relevant here.
|
||||
# Returns::
|
||||
# the String value of the matching attribute, or +nil+ if no
|
||||
# matching attribute was found.
|
||||
# matching attribute was found. This is the unnormalized value
|
||||
# (with entities expanded).
|
||||
#
|
||||
# doc = Document.new "<a foo:att='1' bar:att='2' att='3'/>"
|
||||
# doc.root.attributes['att'] #-> '3'
|
||||
# doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>"
|
||||
# doc.root.attributes['att'] #-> '<'
|
||||
# doc.root.attributes['bar:att'] #-> '2'
|
||||
def [](name)
|
||||
attr = get_attribute(name)
|
||||
|
@ -1119,7 +1076,15 @@ module REXML
|
|||
delete attr
|
||||
return
|
||||
end
|
||||
value = Attribute.new(name, value) unless value.kind_of? Attribute
|
||||
element_document = @element.document
|
||||
unless value.kind_of? Attribute
|
||||
if @element.document and @element.document.doctype
|
||||
value = Text::normalize( value, @element.document.doctype )
|
||||
else
|
||||
value = Text::normalize( value, nil )
|
||||
end
|
||||
value = Attribute.new(name, value)
|
||||
end
|
||||
value.element = @element
|
||||
old_attr = fetch(value.name, nil)
|
||||
if old_attr.nil?
|
||||
|
|
|
@ -58,8 +58,8 @@ module REXML
|
|||
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||
return UTF_16 if /\A\xfe\xff/n =~ str
|
||||
return UNILE if /\A\xff\xfe/n =~ str
|
||||
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
|
||||
return $1.upcase if $1
|
||||
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
|
||||
return $3.upcase if $3
|
||||
return UTF_8
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,9 +3,15 @@
|
|||
#
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods = %q~
|
||||
register( "CP-1252" ) do |o|
|
||||
class << o
|
||||
alias encode encode_cp1252
|
||||
alias decode decode_cp1252
|
||||
end
|
||||
end
|
||||
|
||||
# Convert from UTF-8
|
||||
def encode content
|
||||
def encode_cp1252(content)
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -54,7 +60,7 @@ module REXML
|
|||
end
|
||||
|
||||
# Convert to UTF-8
|
||||
def decode(str)
|
||||
def decode_cp1252(str)
|
||||
array_latin9 = str.unpack('C*')
|
||||
array_enc = []
|
||||
array_latin9.each do |num|
|
||||
|
@ -93,6 +99,5 @@ module REXML
|
|||
end
|
||||
array_enc.pack('U*')
|
||||
end
|
||||
~
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,9 +3,13 @@
|
|||
#
|
||||
module REXML
|
||||
module Encoding
|
||||
@@__REXML_encoding_methods = %q~
|
||||
register("ISO-8859-15") do |o|
|
||||
alias encode to_iso_8859_15
|
||||
alias decode from_iso_8859_15
|
||||
end
|
||||
|
||||
# Convert from UTF-8
|
||||
def to_iso_8859_15 content
|
||||
def to_iso_8859_15(content)
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -64,6 +68,5 @@ module REXML
|
|||
end
|
||||
array_enc.pack('U*')
|
||||
end
|
||||
~
|
||||
end
|
||||
end
|
||||
|
|
|
@ -89,6 +89,12 @@ module REXML
|
|||
|
||||
# Write out a fully formed, correct entity definition (assuming the Entity
|
||||
# object itself is valid.)
|
||||
#
|
||||
# out::
|
||||
# An object implementing <TT><<<TT> to which the entity will be
|
||||
# output
|
||||
# indent::
|
||||
# *DEPRECATED* and ignored
|
||||
def write out, indent=-1
|
||||
out << '<!ENTITY '
|
||||
out << '% ' if @reference
|
||||
|
|
109
lib/rexml/formatters/default.rb
Normal file
109
lib/rexml/formatters/default.rb
Normal file
|
@ -0,0 +1,109 @@
|
|||
module REXML
|
||||
module Formatters
|
||||
class Default
|
||||
# Prints out the XML document with no formatting -- except if id_hack is
|
||||
# set.
|
||||
#
|
||||
# ie_hack::
|
||||
# If set to true, then inserts whitespace before the close of an empty
|
||||
# tag, so that IE's bad XML parser doesn't choke.
|
||||
def initialize( ie_hack=false )
|
||||
@ie_hack = ie_hack
|
||||
end
|
||||
|
||||
# Writes the node to some output.
|
||||
#
|
||||
# node::
|
||||
# The node to write
|
||||
# output::
|
||||
# A class implementing <TT><<</TT>. Pass in an Output object to
|
||||
# change the output encoding.
|
||||
def write( node, output )
|
||||
case node
|
||||
|
||||
when Document
|
||||
if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||
output = Output.new( output, node.xml_decl.encoding )
|
||||
end
|
||||
write_document( node, output )
|
||||
|
||||
when Element
|
||||
write_element( node, output )
|
||||
|
||||
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
|
||||
Attribute, AttlistDecl
|
||||
node.write( output,-1 )
|
||||
|
||||
when Instruction
|
||||
write_instruction( node, output )
|
||||
|
||||
when DocType, XMLDecl
|
||||
node.write( output )
|
||||
|
||||
when Comment
|
||||
write_comment( node, output )
|
||||
|
||||
when CData
|
||||
write_cdata( node, output )
|
||||
|
||||
when Text
|
||||
write_text( node, output )
|
||||
|
||||
else
|
||||
raise Exception.new("XML FORMATTING ERROR")
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
def write_document( node, output )
|
||||
node.children.each { |child| write( child, output ) }
|
||||
end
|
||||
|
||||
def write_element( node, output )
|
||||
output << "<#{node.expanded_name}"
|
||||
|
||||
node.attributes.each_attribute do |attr|
|
||||
output << " "
|
||||
attr.write( output )
|
||||
end unless node.attributes.empty?
|
||||
|
||||
if node.children.empty?
|
||||
output << " " if @ie_hack
|
||||
output << "/"
|
||||
else
|
||||
output << ">"
|
||||
node.children.each { |child|
|
||||
write( child, output )
|
||||
}
|
||||
output << "</#{node.expanded_name}"
|
||||
end
|
||||
output << ">"
|
||||
end
|
||||
|
||||
def write_text( node, output )
|
||||
output << node.to_s()
|
||||
end
|
||||
|
||||
def write_comment( node, output )
|
||||
output << Comment::START
|
||||
output << node.to_s
|
||||
output << Comment::STOP
|
||||
end
|
||||
|
||||
def write_cdata( node, output )
|
||||
output << CData::START
|
||||
output << node.to_s
|
||||
output << CData::STOP
|
||||
end
|
||||
|
||||
def write_instruction( node, output )
|
||||
output << Instruction::START.sub(/\\/u, '')
|
||||
output << node.target
|
||||
output << ' '
|
||||
output << node.content
|
||||
output << Instruction::STOP.sub(/\\/u, '')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
134
lib/rexml/formatters/pretty.rb
Normal file
134
lib/rexml/formatters/pretty.rb
Normal file
|
@ -0,0 +1,134 @@
|
|||
require 'rexml/formatters/default'
|
||||
|
||||
module REXML
|
||||
module Formatters
|
||||
# Pretty-prints an XML document. This destroys whitespace in text nodes
|
||||
# and will insert carriage returns and indentations.
|
||||
#
|
||||
# TODO: Add an option to print attributes on new lines
|
||||
class Pretty < Default
|
||||
|
||||
# If compact is set to true, then the formatter will attempt to use as
|
||||
# little space as possible
|
||||
attr_accessor :compact
|
||||
# The width of a page. Used for formatting text
|
||||
attr_accessor :width
|
||||
|
||||
# Create a new pretty printer.
|
||||
#
|
||||
# output::
|
||||
# An object implementing '<<(String)', to which the output will be written.
|
||||
# indentation::
|
||||
# An integer greater than 0. The indentation of each level will be
|
||||
# this number of spaces. If this is < 1, the behavior of this object
|
||||
# is undefined. Defaults to 2.
|
||||
# ie_hack::
|
||||
# If true, the printer will insert whitespace before closing empty
|
||||
# tags, thereby allowing Internet Explorer's feeble XML parser to
|
||||
# function. Defaults to false.
|
||||
def initialize( indentation=2, ie_hack=false )
|
||||
@indentation = indentation
|
||||
@level = 0
|
||||
@ie_hack = ie_hack
|
||||
@width = 80
|
||||
end
|
||||
|
||||
protected
|
||||
def write_element(node, output)
|
||||
output << ' '*@level
|
||||
output << "<#{node.expanded_name}"
|
||||
|
||||
node.attributes.each_attribute do |attr|
|
||||
output << " "
|
||||
attr.write( output )
|
||||
end unless node.attributes.empty?
|
||||
|
||||
if node.children.empty?
|
||||
if @ie_hack
|
||||
output << " "
|
||||
end
|
||||
output << "/"
|
||||
else
|
||||
output << ">"
|
||||
# If compact and all children are text, and if the formatted output
|
||||
# is less than the specified width, then try to print everything on
|
||||
# one line
|
||||
skip = false
|
||||
if compact
|
||||
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
|
||||
string = ""
|
||||
node.children.each { |child| write( child, string, 0 ) }
|
||||
if string.length + @level < @width
|
||||
output << string
|
||||
skip = true
|
||||
end
|
||||
end
|
||||
end
|
||||
unless skip
|
||||
output << "\n"
|
||||
@level += @indentation
|
||||
node.children.each { |child|
|
||||
next if child.kind_of?(Text) and child.to_s.strip.length == 0
|
||||
write( child, output )
|
||||
output << "\n"
|
||||
}
|
||||
@level -= @indentation
|
||||
output << ' '*@level
|
||||
end
|
||||
output << "</#{node.expanded_name}"
|
||||
end
|
||||
output << ">"
|
||||
end
|
||||
|
||||
def write_text( node, output )
|
||||
s = node.to_s()
|
||||
s.gsub!(/\s/,' ')
|
||||
s.squeeze!(" ")
|
||||
s = wrap(s, 80-@level)
|
||||
s = indent_text(s, @level, " ", true)
|
||||
output << (' '*@level + s)
|
||||
end
|
||||
|
||||
def write_comment( node, output)
|
||||
output << ' ' * @level
|
||||
super
|
||||
end
|
||||
|
||||
def write_cdata( node, output)
|
||||
output << ' ' * @level
|
||||
super
|
||||
end
|
||||
|
||||
def write_document( node, output )
|
||||
# Ok, this is a bit odd. All XML documents have an XML declaration,
|
||||
# but it may not write itself if the user didn't specifically add it,
|
||||
# either through the API or in the input document. If it doesn't write
|
||||
# itself, then we don't need a carriage return... which makes this
|
||||
# logic more complex.
|
||||
node.children.each { |child|
|
||||
next if child == node.children[-1] and child.instance_of?(Text)
|
||||
unless child == node.children[0] or child.instance_of?(Text) or
|
||||
(child == node.children[1] and !node.children[0].writethis)
|
||||
output << "\n"
|
||||
end
|
||||
write( child, output )
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
||||
return string if level < 0
|
||||
string.gsub(/\n/, "\n#{style*level}")
|
||||
end
|
||||
|
||||
def wrap(string, width)
|
||||
# Recursivly wrap string at width.
|
||||
return string if string.length <= width
|
||||
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
||||
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
56
lib/rexml/formatters/transitive.rb
Normal file
56
lib/rexml/formatters/transitive.rb
Normal file
|
@ -0,0 +1,56 @@
|
|||
require 'rexml/formatters/pretty'
|
||||
|
||||
module REXML
|
||||
module Formatters
|
||||
# The Transitive formatter writes an XML document that parses to an
|
||||
# identical document as the source document. This means that no extra
|
||||
# whitespace nodes are inserted, and whitespace within text nodes is
|
||||
# preserved. Within these constraints, the document is pretty-printed,
|
||||
# with whitespace inserted into the metadata to introduce formatting.
|
||||
#
|
||||
# Note that this is only useful if the original XML is not already
|
||||
# formatted. Since this formatter does not alter whitespace nodes, the
|
||||
# results of formatting already formatted XML will be odd.
|
||||
class Transitive < Default
|
||||
def initialize( indentation=2 )
|
||||
@indentation = indentation
|
||||
@level = 0
|
||||
end
|
||||
|
||||
protected
|
||||
def write_element( node, output )
|
||||
output << "<#{node.expanded_name}"
|
||||
|
||||
node.attributes.each_attribute do |attr|
|
||||
output << " "
|
||||
attr.write( output )
|
||||
end unless node.attributes.empty?
|
||||
|
||||
output << "\n"
|
||||
output << ' '*@level
|
||||
if node.children.empty?
|
||||
output << "/"
|
||||
else
|
||||
output << ">"
|
||||
# If compact and all children are text, and if the formatted output
|
||||
# is less than the specified width, then try to print everything on
|
||||
# one line
|
||||
skip = false
|
||||
@level += @indentation
|
||||
node.children.each { |child|
|
||||
write( child, output )
|
||||
}
|
||||
@level -= @indentation
|
||||
output << "</#{node.expanded_name}"
|
||||
output << "\n"
|
||||
output << ' '*@level
|
||||
end
|
||||
output << ">"
|
||||
end
|
||||
|
||||
def write_text( node, output )
|
||||
output << node.to_s()
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -38,7 +38,11 @@ module REXML
|
|||
Instruction.new self
|
||||
end
|
||||
|
||||
# == DEPRECATED
|
||||
# See the rexml/formatters package
|
||||
#
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
Kernel.warn( "#{self.class.name}.write is deprecated" )
|
||||
indent(writer, indent)
|
||||
writer << START.sub(/\\/u, '')
|
||||
writer << @target
|
||||
|
|
|
@ -18,10 +18,19 @@ module REXML
|
|||
@parent[ ind - 1 ]
|
||||
end
|
||||
|
||||
def to_s indent=-1
|
||||
rv = ""
|
||||
write rv,indent
|
||||
rv
|
||||
# indent::
|
||||
# *DEPRECATED* This parameter is now ignored. See the formatters in the
|
||||
# REXML::Formatters package for changing the output style.
|
||||
def to_s indent=nil
|
||||
unless indent.nil?
|
||||
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
|
||||
f = REXML::Formatters::Pretty.new( indent )
|
||||
f.write( self, rv, indent )
|
||||
else
|
||||
f = REXML::Formatters::Default.new
|
||||
f.write( self, rv = "" )
|
||||
end
|
||||
return rv
|
||||
end
|
||||
|
||||
def indent to, ind
|
||||
|
|
|
@ -53,7 +53,7 @@ module REXML
|
|||
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
|
||||
|
||||
ENTITY_START = /^\s*<!ENTITY/
|
||||
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
|
||||
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
||||
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
||||
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
||||
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
||||
|
@ -217,10 +217,10 @@ module REXML
|
|||
close = md[2]
|
||||
identity =~ IDENTITY
|
||||
name = $1
|
||||
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
|
||||
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
|
||||
pub_sys = $2.nil? ? nil : $2.strip
|
||||
long_name = $3.nil? ? nil : $3.strip
|
||||
uri = $4.nil? ? nil : $4.strip
|
||||
long_name = $4.nil? ? nil : $4.strip
|
||||
uri = $6.nil? ? nil : $6.strip
|
||||
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
||||
if close == ">"
|
||||
@document_status = :after_doctype
|
||||
|
|
|
@ -94,6 +94,8 @@ module REXML
|
|||
when :end_document
|
||||
handle( :end_document )
|
||||
break
|
||||
when :start_doctype
|
||||
handle( :doctype, *event[1..-1])
|
||||
when :end_doctype
|
||||
context = context[1]
|
||||
when :start_element
|
||||
|
@ -167,7 +169,7 @@ module REXML
|
|||
when :entitydecl
|
||||
@entities[ event[1] ] = event[2] if event.size == 3
|
||||
handle( *event )
|
||||
when :processing_instruction, :comment, :doctype, :attlistdecl,
|
||||
when :processing_instruction, :comment, :attlistdecl,
|
||||
:elementdecl, :cdata, :notationdecl, :xmldecl
|
||||
handle( *event )
|
||||
end
|
||||
|
|
|
@ -551,7 +551,7 @@ module REXML
|
|||
end
|
||||
end
|
||||
#puts "BEFORE WITH '#{rest}'"
|
||||
rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
|
||||
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
|
||||
parsed.concat(n)
|
||||
return rest
|
||||
end
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
#
|
||||
# Main page:: http://www.germane-software.com/software/rexml
|
||||
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
||||
# Version:: 3.1.5
|
||||
# Date:: 2006/250
|
||||
# Version:: 3.1.7
|
||||
# Date:: 2007/206
|
||||
#
|
||||
# This API documentation can be downloaded from the REXML home page, or can
|
||||
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
||||
|
@ -20,9 +20,10 @@
|
|||
# or can be accessed
|
||||
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
||||
module REXML
|
||||
COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
|
||||
DATE = "2006/250"
|
||||
VERSION = "3.1.5"
|
||||
COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
|
||||
DATE = "2007/206"
|
||||
VERSION = "3.1.7"
|
||||
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
|
||||
|
||||
Copyright = COPYRIGHT
|
||||
Version = VERSION
|
||||
|
|
|
@ -147,6 +147,9 @@ module REXML
|
|||
self.encoding = encoding
|
||||
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
|
||||
self.encoding = check_encoding( str )
|
||||
elsif (0xef == str[0] && 0xbb == str[1])
|
||||
str += @source.read(1)
|
||||
str = '' if (0xbf == str[2])
|
||||
else
|
||||
@line_break = '>'
|
||||
end
|
||||
|
|
|
@ -211,16 +211,17 @@ module REXML
|
|||
return new_string
|
||||
end
|
||||
|
||||
# == DEPRECATED
|
||||
# See REXML::Formatters
|
||||
#
|
||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
||||
s = to_s()
|
||||
if not (@parent and @parent.whitespace) then
|
||||
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
|
||||
if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
|
||||
s = indent_text(s, indent, @parent.context[:indentstyle], false)
|
||||
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
||||
formatter = if indent > -1
|
||||
REXML::Formatters::Pretty.new( indent )
|
||||
else
|
||||
REXML::Formatters::Default.new
|
||||
end
|
||||
s.squeeze!(" \n\t") if @parent and !@parent.whitespace
|
||||
end
|
||||
writer << s
|
||||
formatter.write( self, writer )
|
||||
end
|
||||
|
||||
# FIXME
|
||||
|
|
|
@ -13,7 +13,7 @@ module REXML
|
|||
STOP = '\?>';
|
||||
|
||||
attr_accessor :version, :standalone
|
||||
attr_reader :writeencoding
|
||||
attr_reader :writeencoding, :writethis
|
||||
|
||||
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
||||
@writethis = true
|
||||
|
@ -37,9 +37,14 @@ module REXML
|
|||
XMLDecl.new(self)
|
||||
end
|
||||
|
||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||
# indent::
|
||||
# Ignored. There must be no whitespace before an XML declaration
|
||||
# transitive::
|
||||
# Ignored
|
||||
# ie_hack::
|
||||
# Ignored
|
||||
def write(writer, indent=-1, transitive=false, ie_hack=false)
|
||||
return nil unless @writethis or writer.kind_of? Output
|
||||
indent( writer, indent )
|
||||
writer << START.sub(/\\/u, '')
|
||||
if writer.kind_of? Output
|
||||
writer << " #{content writer.encoding}"
|
||||
|
|
|
@ -352,7 +352,8 @@ module REXML
|
|||
when :following_sibling
|
||||
#puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
|
||||
results = []
|
||||
for node in nodeset
|
||||
nodeset.each do |node|
|
||||
next if node.parent.nil?
|
||||
all_siblings = node.parent.children
|
||||
current_index = all_siblings.index( node )
|
||||
following_siblings = all_siblings[ current_index+1 .. -1 ]
|
||||
|
@ -363,13 +364,14 @@ module REXML
|
|||
|
||||
when :preceding_sibling
|
||||
results = []
|
||||
for node in nodeset
|
||||
nodeset.each do |node|
|
||||
next if node.parent.nil?
|
||||
all_siblings = node.parent.children
|
||||
current_index = all_siblings.index( node )
|
||||
preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse
|
||||
#results += expr( path_stack.dclone, preceding_siblings )
|
||||
preceding_siblings = all_siblings[ 0, current_index ].reverse
|
||||
results += preceding_siblings
|
||||
end
|
||||
nodeset = preceding_siblings || []
|
||||
nodeset = results
|
||||
node_types = ELEMENTS
|
||||
|
||||
when :preceding
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue