1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Merges upstream changes for REXML v3.1.7

http://www.germane-software.com/repos/rexml/tags/3.1.7

 r1278@bean:  ser | 2007-06-07 00:53:06 -0400
 Fixed a double-encoding bug.  This was a regression, related
 to ticket:48.
 r1292@bean:  ser | 2007-07-25 08:19:36 -0400
  r1279@bean:  ser | 2007-06-09 23:19:02 -0400
  Fixes ticket:89 -- encoding CP-1252 was broken.  ISO-8859-15 had the same
  problem.
  
  Also in this patch is a fix to merge.rb (unused, but it should at least
  contain no errors), and a unit test for ticket:88.
 
 r1293@bean:  ser | 2007-07-25 08:19:37 -0400
  r1281@bean:  ser | 2007-07-24 11:08:48 -0400
  Addresses ticket:85
  
  This is a major rewrite of the XML formatting code.  The XML writers have all
  been extracted out of the classes and put into their own class containers.
  This makes writing parsers easier, and cleaner.
  
  There are three formatters, which correspond to the previous three XML writing
  modes:
  
    REXML::Formatters::Default    
        Prints the XML document exactly as it was parsed
    REXML::Formatters::Pretty     
        Pretty prints the XML document, destroying whitespace in the document
    REXML::Formatters::Transitive 
        Pretty prints the XML document, preserving whitespace
  
  All of the write() functions have been deprecated (some are still used, but
  these will also go away) except the write() function on Document, which is left
  for convenience.  To pretty print an XML document the canonical way:
  
    formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces
    formatter.write( document, output )
  
 
 r1294@bean:  ser | 2007-07-25 08:19:38 -0400
  r1283@bean:  ser | 2007-07-24 19:53:30 -0400
  This goes with the previous commit.
 
 r1295@bean:  ser | 2007-07-25 08:19:39 -0400
  r1285@bean:  ser | 2007-07-24 20:02:07 -0400
  And THIS goes with the previous two patches.  Dammit.
 
 r1296@bean:  ser | 2007-07-25 08:19:40 -0400
  r1287@bean:  ser | 2007-07-24 20:12:25 -0400
  Applied patch from Jeff Barczewski.  Note that this changes what the values of
  the name and IDs are from the previous behavior -- the values no longer include
  the quotes.  This is the correct behavior, so I'm leaving it in, but it is not
  backwards compatible.  Also fixes the serializer so that it outputs the doctype
  in a correct format (needed as a result of this change).
 
 r1297@bean:  ser | 2007-07-25 08:38:38 -0400
 Version update


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2007-07-25 12:47:23 +00:00
parent fead3c93e7
commit 1d8c98a486
23 changed files with 1689 additions and 1328 deletions

View file

@ -18,16 +18,32 @@ module REXML
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
# Constructor. # Constructor.
# FIXME: The parser doesn't catch illegal characters in attributes
#
# first::
# Either: an Attribute, which this new attribute will become a
# clone of; or a String, which is the name of this attribute
# second::
# If +first+ is an Attribute, then this may be an Element, or nil.
# If nil, then the Element parent of this attribute is the parent
# of the +first+ Attribute. If the first argument is a String,
# then this must also be a String, and is the content of the attribute.
# If this is the content, it must be fully normalized (contain no
# illegal characters).
# parent::
# Ignored unless +first+ is a String; otherwise, may be the Element
# parent of this attribute, or nil.
#
# #
# Attribute.new( attribute_to_clone ) # Attribute.new( attribute_to_clone )
# Attribute.new( source ) # Attribute.new( attribute_to_clone, parent_element )
# Attribute.new( "attr", "attr_value" ) # Attribute.new( "attr", "attr_value" )
# Attribute.new( "attr", "attr_value", parent_element ) # Attribute.new( "attr", "attr_value", parent_element )
def initialize( first, second=nil, parent=nil ) def initialize( first, second=nil, parent=nil )
@normalized = @unnormalized = @element = nil @normalized = @unnormalized = @element = nil
if first.kind_of? Attribute if first.kind_of? Attribute
self.name = first.expanded_name self.name = first.expanded_name
@value = first.value @unnormalized = first.value
if second.kind_of? Element if second.kind_of? Element
@element = second @element = second
else else
@ -36,7 +52,7 @@ module REXML
elsif first.kind_of? String elsif first.kind_of? String
@element = parent if parent.kind_of? Element @element = parent if parent.kind_of? Element
self.name = first self.name = first
@value = second.to_s @normalized = second.to_s
else else
raise "illegal argument #{first.class.name} to Attribute constructor" raise "illegal argument #{first.class.name} to Attribute constructor"
end end
@ -72,7 +88,7 @@ module REXML
# Returns true if other is an Attribute and has the same name and value, # Returns true if other is an Attribute and has the same name and value,
# false otherwise. # false otherwise.
def ==( other ) def ==( other )
other.kind_of?(Attribute) and other.name==name and other.value==@value other.kind_of?(Attribute) and other.name==name and other.value==value
end end
# Creates (and returns) a hash from both the name and value # Creates (and returns) a hash from both the name and value
@ -87,7 +103,11 @@ module REXML
# b = Attribute.new( "ns:x", "y" ) # b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'" # b.to_string # -> "ns:x='y'"
def to_string def to_string
"#@expanded_name='#{to_s().gsub(/'/, ''')}'" if @element and @element.context and @element.context[:attribute_quote] == :quote
%Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
else
"#@expanded_name='#{to_s().gsub(/'/, ''')}'"
end
end end
# Returns the attribute value, with entities replaced # Returns the attribute value, with entities replaced
@ -100,8 +120,9 @@ module REXML
doctype = doc.doctype if doc doctype = doc.doctype if doc
end end
@normalized = Text::normalize( @unnormalized, doctype )
@unnormalized = nil @unnormalized = nil
@normalized = Text::normalize( @value, doctype ) @normalized
end end
# Returns the UNNORMALIZED value of this attribute. That is, entities # Returns the UNNORMALIZED value of this attribute. That is, entities
@ -113,8 +134,9 @@ module REXML
doc = @element.document doc = @element.document
doctype = doc.doctype if doc doctype = doc.doctype if doc
end end
@unnormalized = Text::unnormalize( @normalized, doctype )
@normalized = nil @normalized = nil
@unnormalized = Text::unnormalize( @value, doctype ) @unnormalized
end end
# Returns a copy of this attribute # Returns a copy of this attribute

View file

@ -39,31 +39,26 @@ module REXML
@string @string
end end
# == DEPRECATED
# See the rexml/formatters package
#
# Generates XML output of this object # Generates XML output of this object
# #
# output:: # output::
# Where to write the string. Defaults to $stdout # Where to write the string. Defaults to $stdout
# indent:: # indent::
# An integer. If -1, no indenting will be used; otherwise, the # The amount to indent this node by
# indentation will be this number of spaces, and children will be
# indented an additional amount. Defaults to -1.
# transitive:: # transitive::
# If transitive is true and indent is >= 0, then the output will be # Ignored
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# ie_hack:: # ie_hack::
# Internet Explorer is the worst piece of crap to have ever been # Ignored
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags.
# #
# _Examples_ # _Examples_
# c = CData.new( " Some text " ) # c = CData.new( " Some text " )
# c.write( $stdout ) #-> <![CDATA[ Some text ]]> # c.write( $stdout ) #-> <![CDATA[ Some text ]]>
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
#indent( output, indent ) unless transitive Kernel.warn( "#{self.class.name}.write is deprecated" )
indent( output, indent )
output << START output << START
output << @string output << @string
output << STOP output << STOP

View file

@ -34,6 +34,9 @@ module REXML
Comment.new self Comment.new self
end end
# == DEPRECATED
# See REXML::Formatters
#
# output:: # output::
# Where to write the string # Where to write the string
# indent:: # indent::
@ -45,6 +48,7 @@ module REXML
# ie_hack:: # ie_hack::
# Needed for conformity to the child API, but not used by this class. # Needed for conformity to the child API, but not used by this class.
def write( output, indent=-1, transitive=false, ie_hack=false ) def write( output, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
indent( output, indent ) indent( output, indent )
output << START output << START
output << @string output << @string

View file

@ -98,38 +98,30 @@ module REXML
# output:: # output::
# Where to write the string # Where to write the string
# indent:: # indent::
# An integer. If -1, no indenting will be used; otherwise, the # An integer. If -1, no indentation will be used; otherwise, the
# indentation will be this number of spaces, and children will be # indentation will be this number of spaces, and children will be
# indented an additional amount. # indented an additional amount.
# transitive:: # transitive::
# If transitive is true and indent is >= 0, then the output will be # Ignored
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# ie_hack:: # ie_hack::
# Internet Explorer is the worst piece of crap to have ever been # Ignored
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags.
#
def write( output, indent=0, transitive=false, ie_hack=false ) def write( output, indent=0, transitive=false, ie_hack=false )
f = REXML::Formatters::Default.new
indent( output, indent ) indent( output, indent )
output << START output << START
output << ' ' output << ' '
output << @name output << @name
output << " #@external_id" if @external_id output << " #@external_id" if @external_id
output << " #@long_name" if @long_name output << " #{@long_name.inspect}" if @long_name
output << " #@uri" if @uri output << " #{@uri.inspect}" if @uri
unless @children.empty? unless @children.empty?
next_indent = indent + 1 next_indent = indent + 1
output << ' [' output << ' ['
child = nil # speed child = nil # speed
@children.each { |child| @children.each { |child|
output << "\n" output << "\n"
child.write( output, next_indent ) f.write( child, output )
} }
#output << ' '*next_indent
output << "\n]" output << "\n]"
end end
output << STOP output << STOP
@ -219,8 +211,10 @@ module REXML
@string+'>' @string+'>'
end end
# == DEPRECATED
# See REXML::Formatters
#
def write( output, indent ) def write( output, indent )
output << (' '*indent) if indent > 0
output << to_s output << to_s
end end
end end
@ -264,7 +258,6 @@ module REXML
end end
def write( output, indent=-1 ) def write( output, indent=-1 )
output << (' '*indent) if indent > 0
output << to_s output << to_s
end end

View file

@ -31,9 +31,6 @@ module REXML
# to be sources of valid XML documents. # to be sources of valid XML documents.
# @param context if supplied, contains the context of the document; # @param context if supplied, contains the context of the document;
# this should be a Hash. # this should be a Hash.
# NOTE that I'm not sure what the context is for; I cloned it out of
# the Electric XML API (in which it also seems to do nothing), and it
# is now legacy. It may do something, someday... it may disappear.
def initialize( source = nil, context = {} ) def initialize( source = nil, context = {} )
super() super()
@context = context @context = context
@ -142,14 +139,53 @@ module REXML
xml_decl().stand_alone? xml_decl().stand_alone?
end end
# Write the XML tree out, optionally with indent. This writes out the # Write the XML tree out. This writes the entire XML document, including
# entire XML document, including XML declarations, doctype declarations, # declarations and processing instructions.
# and processing instructions (if any are given). #
# A controversial point is whether Document should always write the XML # A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the # declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not # user (or source document). REXML does not write one if one was not
# specified, because it adds unneccessary bandwidth to applications such # specified, because it adds unneccessary bandwidth to applications such
# as XML-RPC. # as XML-RPC.
#
# _Examples_
# Document.new("<a><b/></a>").serialize
#
# output_string = ""
# tr = Transitive.new( output_string )
# Document.new("<a><b/></a>").serialize( tr )
#
# formatter::
# One of the rexml/formatters classes. If none is given, then the Pretty
# formatter will be used to dump the XML to the STDOUT.
def serialize( formatter = nil )
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
formatter = REXML::Pretty.new( $stdout ) if (formatter.nil?)
@children.each { |node|
puts "node = #{node.inspect}"
indent( output, indent ) if node.node_type == :element
if node.write( output, indent, transitive, ie_hack )
output << "\n" unless indent<0 or node == @children[-1]
end
}
end
# Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given).
#
# A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not
# specified, because it adds unneccessary bandwidth to applications such
# as XML-RPC.
#
# See also the classes in the rexml/formatters package for the proper way
# to change the default formatting of XML output
# #
# #
# output:: # output::
@ -160,7 +196,7 @@ module REXML
# indentation will be twice this number of spaces, and children will be # indentation will be twice this number of spaces, and children will be
# indented an additional amount. For a value of 3, every item will be # indented an additional amount. For a value of 3, every item will be
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1 # indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
# transitive:: # trans::
# If transitive is true and indent is >= 0, then the output will be # If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect # pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value # the absolute *value* of the document -- that is, it leaves the value
@ -171,14 +207,20 @@ module REXML
# unable to parse proper XML, we have to provide a hack to generate XML # unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space # that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false # before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
@children.each { |node| output = Output.new( output, xml_decl.encoding )
indent( output, indent ) if node.node_type == :element end
if node.write( output, indent, transitive, ie_hack ) formatter = if indent > -1
output << "\n" unless indent<0 or node == @children[-1] if transitive
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
end
else
REXML::Formatters::Default.new( ie_hack )
end end
} formatter.write( self, output )
end end

File diff suppressed because it is too large Load diff

View file

@ -58,8 +58,8 @@ module REXML
# We have to recognize UTF-16, LSB UTF-16, and UTF-8 # We have to recognize UTF-16, LSB UTF-16, and UTF-8
return UTF_16 if /\A\xfe\xff/n =~ str return UTF_16 if /\A\xfe\xff/n =~ str
return UNILE if /\A\xff\xfe/n =~ str return UNILE if /\A\xff\xfe/n =~ str
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
return $1.upcase if $1 return $3.upcase if $3
return UTF_8 return UTF_8
end end
end end

View file

@ -3,9 +3,15 @@
# #
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods = %q~ register( "CP-1252" ) do |o|
class << o
alias encode encode_cp1252
alias decode decode_cp1252
end
end
# Convert from UTF-8 # Convert from UTF-8
def encode content def encode_cp1252(content)
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -54,7 +60,7 @@ module REXML
end end
# Convert to UTF-8 # Convert to UTF-8
def decode(str) def decode_cp1252(str)
array_latin9 = str.unpack('C*') array_latin9 = str.unpack('C*')
array_enc = [] array_enc = []
array_latin9.each do |num| array_latin9.each do |num|
@ -93,6 +99,5 @@ module REXML
end end
array_enc.pack('U*') array_enc.pack('U*')
end end
~
end end
end end

View file

@ -3,9 +3,13 @@
# #
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods = %q~ register("ISO-8859-15") do |o|
alias encode to_iso_8859_15
alias decode from_iso_8859_15
end
# Convert from UTF-8 # Convert from UTF-8
def to_iso_8859_15 content def to_iso_8859_15(content)
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -64,6 +68,5 @@ module REXML
end end
array_enc.pack('U*') array_enc.pack('U*')
end end
~
end end
end end

View file

@ -89,6 +89,12 @@ module REXML
# Write out a fully formed, correct entity definition (assuming the Entity # Write out a fully formed, correct entity definition (assuming the Entity
# object itself is valid.) # object itself is valid.)
#
# out::
# An object implementing <TT>&lt;&lt;<TT> to which the entity will be
# output
# indent::
# *DEPRECATED* and ignored
def write out, indent=-1 def write out, indent=-1
out << '<!ENTITY ' out << '<!ENTITY '
out << '% ' if @reference out << '% ' if @reference

View file

@ -0,0 +1,109 @@
module REXML
module Formatters
class Default
# Prints out the XML document with no formatting -- except if id_hack is
# set.
#
# ie_hack::
# If set to true, then inserts whitespace before the close of an empty
# tag, so that IE's bad XML parser doesn't choke.
def initialize( ie_hack=false )
@ie_hack = ie_hack
end
# Writes the node to some output.
#
# node::
# The node to write
# output::
# A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
# change the output encoding.
def write( node, output )
case node
when Document
if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )
when Element
write_element( node, output )
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
Attribute, AttlistDecl
node.write( output,-1 )
when Instruction
write_instruction( node, output )
when DocType, XMLDecl
node.write( output )
when Comment
write_comment( node, output )
when CData
write_cdata( node, output )
when Text
write_text( node, output )
else
raise Exception.new("XML FORMATTING ERROR")
end
end
protected
def write_document( node, output )
node.children.each { |child| write( child, output ) }
end
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
output << " " if @ie_hack
output << "/"
else
output << ">"
node.children.each { |child|
write( child, output )
}
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
def write_comment( node, output )
output << Comment::START
output << node.to_s
output << Comment::STOP
end
def write_cdata( node, output )
output << CData::START
output << node.to_s
output << CData::STOP
end
def write_instruction( node, output )
output << Instruction::START.sub(/\\/u, '')
output << node.target
output << ' '
output << node.content
output << Instruction::STOP.sub(/\\/u, '')
end
end
end
end

View file

@ -0,0 +1,134 @@
require 'rexml/formatters/default'
module REXML
module Formatters
# Pretty-prints an XML document. This destroys whitespace in text nodes
# and will insert carriage returns and indentations.
#
# TODO: Add an option to print attributes on new lines
class Pretty < Default
# If compact is set to true, then the formatter will attempt to use as
# little space as possible
attr_accessor :compact
# The width of a page. Used for formatting text
attr_accessor :width
# Create a new pretty printer.
#
# output::
# An object implementing '<<(String)', to which the output will be written.
# indentation::
# An integer greater than 0. The indentation of each level will be
# this number of spaces. If this is < 1, the behavior of this object
# is undefined. Defaults to 2.
# ie_hack::
# If true, the printer will insert whitespace before closing empty
# tags, thereby allowing Internet Explorer's feeble XML parser to
# function. Defaults to false.
def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
@ie_hack = ie_hack
@width = 80
end
protected
def write_element(node, output)
output << ' '*@level
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
if @ie_hack
output << " "
end
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
string = ""
node.children.each { |child| write( child, string, 0 ) }
if string.length + @level < @width
output << string
skip = true
end
end
end
unless skip
output << "\n"
@level += @indentation
node.children.each { |child|
next if child.kind_of?(Text) and child.to_s.strip.length == 0
write( child, output )
output << "\n"
}
@level -= @indentation
output << ' '*@level
end
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
s = node.to_s()
s.gsub!(/\s/,' ')
s.squeeze!(" ")
s = wrap(s, 80-@level)
s = indent_text(s, @level, " ", true)
output << (' '*@level + s)
end
def write_comment( node, output)
output << ' ' * @level
super
end
def write_cdata( node, output)
output << ' ' * @level
super
end
def write_document( node, output )
# Ok, this is a bit odd. All XML documents have an XML declaration,
# but it may not write itself if the user didn't specifically add it,
# either through the API or in the input document. If it doesn't write
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
next if child == node.children[-1] and child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
end
write( child, output )
}
end
private
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
string.gsub(/\n/, "\n#{style*level}")
end
def wrap(string, width)
# Recursivly wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
end
end

View file

@ -0,0 +1,56 @@
require 'rexml/formatters/pretty'
module REXML
module Formatters
# The Transitive formatter writes an XML document that parses to an
# identical document as the source document. This means that no extra
# whitespace nodes are inserted, and whitespace within text nodes is
# preserved. Within these constraints, the document is pretty-printed,
# with whitespace inserted into the metadata to introduce formatting.
#
# Note that this is only useful if the original XML is not already
# formatted. Since this formatter does not alter whitespace nodes, the
# results of formatting already formatted XML will be odd.
class Transitive < Default
def initialize( indentation=2 )
@indentation = indentation
@level = 0
end
protected
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
output << "\n"
output << ' '*@level
if node.children.empty?
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
@level += @indentation
node.children.each { |child|
write( child, output )
}
@level -= @indentation
output << "</#{node.expanded_name}"
output << "\n"
output << ' '*@level
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
end
end
end

View file

@ -38,7 +38,11 @@ module REXML
Instruction.new self Instruction.new self
end end
# == DEPRECATED
# See the rexml/formatters package
#
def write writer, indent=-1, transitive=false, ie_hack=false def write writer, indent=-1, transitive=false, ie_hack=false
Kernel.warn( "#{self.class.name}.write is deprecated" )
indent(writer, indent) indent(writer, indent)
writer << START.sub(/\\/u, '') writer << START.sub(/\\/u, '')
writer << @target writer << @target

View file

@ -18,10 +18,19 @@ module REXML
@parent[ ind - 1 ] @parent[ ind - 1 ]
end end
def to_s indent=-1 # indent::
rv = "" # *DEPRECATED* This parameter is now ignored. See the formatters in the
write rv,indent # REXML::Formatters package for changing the output style.
rv def to_s indent=nil
unless indent.nil?
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
f = REXML::Formatters::Pretty.new( indent )
f.write( self, rv, indent )
else
f = REXML::Formatters::Default.new
f.write( self, rv = "" )
end
return rv
end end
def indent to, ind def indent to, ind

View file

@ -53,7 +53,7 @@ module REXML
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
ENTITY_START = /^\s*<!ENTITY/ ENTITY_START = /^\s*<!ENTITY/
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
@ -217,10 +217,10 @@ module REXML
close = md[2] close = md[2]
identity =~ IDENTITY identity =~ IDENTITY
name = $1 name = $1
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil? raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip pub_sys = $2.nil? ? nil : $2.strip
long_name = $3.nil? ? nil : $3.strip long_name = $4.nil? ? nil : $4.strip
uri = $4.nil? ? nil : $4.strip uri = $6.nil? ? nil : $6.strip
args = [ :start_doctype, name, pub_sys, long_name, uri ] args = [ :start_doctype, name, pub_sys, long_name, uri ]
if close == ">" if close == ">"
@document_status = :after_doctype @document_status = :after_doctype

View file

@ -94,6 +94,8 @@ module REXML
when :end_document when :end_document
handle( :end_document ) handle( :end_document )
break break
when :start_doctype
handle( :doctype, *event[1..-1])
when :end_doctype when :end_doctype
context = context[1] context = context[1]
when :start_element when :start_element
@ -167,7 +169,7 @@ module REXML
when :entitydecl when :entitydecl
@entities[ event[1] ] = event[2] if event.size == 3 @entities[ event[1] ] = event[2] if event.size == 3
handle( *event ) handle( *event )
when :processing_instruction, :comment, :doctype, :attlistdecl, when :processing_instruction, :comment, :attlistdecl,
:elementdecl, :cdata, :notationdecl, :xmldecl :elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event ) handle( *event )
end end

View file

@ -551,7 +551,7 @@ module REXML
end end
end end
#puts "BEFORE WITH '#{rest}'" #puts "BEFORE WITH '#{rest}'"
rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/ rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
parsed.concat(n) parsed.concat(n)
return rest return rest
end end

View file

@ -10,8 +10,8 @@
# #
# Main page:: http://www.germane-software.com/software/rexml # Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom> # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.5 # Version:: 3.1.7
# Date:: 2006/250 # Date:: 2007/206
# #
# This API documentation can be downloaded from the REXML home page, or can # This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc] # be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -20,9 +20,10 @@
# or can be accessed # or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html] # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML module REXML
COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>" COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
DATE = "2006/250" DATE = "2007/206"
VERSION = "3.1.5" VERSION = "3.1.7"
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
Copyright = COPYRIGHT Copyright = COPYRIGHT
Version = VERSION Version = VERSION

View file

@ -1,139 +1,139 @@
require 'rexml/encoding' require 'rexml/encoding'
module REXML module REXML
# Generates Source-s. USE THIS CLASS. # Generates Source-s. USE THIS CLASS.
class SourceFactory class SourceFactory
# Generates a Source object # Generates a Source object
# @param arg Either a String, or an IO # @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given # @return a Source, or nil if a bad argument was given
def SourceFactory::create_from(arg) def SourceFactory::create_from(arg)
if arg.kind_of? String if arg.kind_of? String
Source.new(arg) Source.new(arg)
elsif arg.respond_to? :read and elsif arg.respond_to? :read and
arg.respond_to? :readline and arg.respond_to? :readline and
arg.respond_to? :nil? and arg.respond_to? :nil? and
arg.respond_to? :eof? arg.respond_to? :eof?
IOSource.new(arg) IOSource.new(arg)
elsif arg.kind_of? Source elsif arg.kind_of? Source
arg arg
else else
raise "#{source.class} is not a valid input stream. It must walk \n"+ raise "#{source.class} is not a valid input stream. It must walk \n"+
"like either a String, IO, or Source." "like either a String, IO, or Source."
end end
end end
end end
# A Source can be searched for patterns, and wraps buffers and other # A Source can be searched for patterns, and wraps buffers and other
# objects and provides consumption of text # objects and provides consumption of text
class Source class Source
include Encoding include Encoding
# The current buffer (what we're going to read next) # The current buffer (what we're going to read next)
attr_reader :buffer attr_reader :buffer
# The line number of the last consumed text # The line number of the last consumed text
attr_reader :line attr_reader :line
attr_reader :encoding attr_reader :encoding
# Constructor # Constructor
# @param arg must be a String, and should be a valid XML document # @param arg must be a String, and should be a valid XML document
# @param encoding if non-null, sets the encoding of the source to this # @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection # value, overriding all encoding detection
def initialize(arg, encoding=nil) def initialize(arg, encoding=nil)
@orig = @buffer = arg @orig = @buffer = arg
if encoding if encoding
self.encoding = encoding self.encoding = encoding
else else
self.encoding = check_encoding( @buffer ) self.encoding = check_encoding( @buffer )
end end
@line = 0 @line = 0
end end
# Inherited from Encoding # Inherited from Encoding
# Overridden to support optimized en/decoding # Overridden to support optimized en/decoding
def encoding=(enc) def encoding=(enc)
return unless super return unless super
@line_break = encode( '>' ) @line_break = encode( '>' )
if enc != UTF_8 if enc != UTF_8
@buffer = decode(@buffer) @buffer = decode(@buffer)
@to_utf = true @to_utf = true
else else
@to_utf = false @to_utf = false
end end
end end
# Scans the source for a given pattern. Note, that this is not your # Scans the source for a given pattern. Note, that this is not your
# usual scan() method. For one thing, the pattern argument has some # usual scan() method. For one thing, the pattern argument has some
# requirements; for another, the source can be consumed. You can easily # requirements; for another, the source can be consumed. You can easily
# confuse this method. Originally, the patterns were easier # confuse this method. Originally, the patterns were easier
# to construct and this method more robust, because this method # to construct and this method more robust, because this method
# generated search regexes on the fly; however, this was # generated search regexes on the fly; however, this was
# computationally expensive and slowed down the entire REXML package # computationally expensive and slowed down the entire REXML package
# considerably, since this is by far the most commonly called method. # considerably, since this is by far the most commonly called method.
# @param pattern must be a Regexp, and must be in the form of # @param pattern must be a Regexp, and must be in the form of
# /^\s*(#{your pattern, with no groups})(.*)/. The first group # /^\s*(#{your pattern, with no groups})(.*)/. The first group
# will be returned; the second group is used if the consume flag is # will be returned; the second group is used if the consume flag is
# set. # set.
# @param consume if true, the pattern returned will be consumed, leaving # @param consume if true, the pattern returned will be consumed, leaving
# everything after it in the Source. # everything after it in the Source.
# @return the pattern, if found, or nil if the Source is empty or the # @return the pattern, if found, or nil if the Source is empty or the
# pattern is not found. # pattern is not found.
def scan(pattern, cons=false) def scan(pattern, cons=false)
return nil if @buffer.nil? return nil if @buffer.nil?
rv = @buffer.scan(pattern) rv = @buffer.scan(pattern)
@buffer = $' if cons and rv.size>0 @buffer = $' if cons and rv.size>0
rv rv
end end
def read def read
end end
def consume( pattern ) def consume( pattern )
@buffer = $' if pattern.match( @buffer ) @buffer = $' if pattern.match( @buffer )
end end
def match_to( char, pattern ) def match_to( char, pattern )
return pattern.match(@buffer) return pattern.match(@buffer)
end end
def match_to_consume( char, pattern ) def match_to_consume( char, pattern )
md = pattern.match(@buffer) md = pattern.match(@buffer)
@buffer = $' @buffer = $'
return md return md
end end
def match(pattern, cons=false) def match(pattern, cons=false)
md = pattern.match(@buffer) md = pattern.match(@buffer)
@buffer = $' if cons and md @buffer = $' if cons and md
return md return md
end end
# @return true if the Source is exhausted # @return true if the Source is exhausted
def empty? def empty?
@buffer == "" @buffer == ""
end end
def position def position
@orig.index( @buffer ) @orig.index( @buffer )
end end
# @return the current line in the source # @return the current line in the source
def current_line def current_line
lines = @orig.split lines = @orig.split
res = lines.grep @buffer[0..30] res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array res = res[-1] if res.kind_of? Array
lines.index( res ) if res lines.index( res ) if res
end end
end end
# A Source that wraps an IO. See the Source class for method # A Source that wraps an IO. See the Source class for method
# documentation # documentation
class IOSource < Source class IOSource < Source
#attr_reader :block_size #attr_reader :block_size
# block_size has been deprecated # block_size has been deprecated
def initialize(arg, block_size=500, encoding=nil) def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg @er_source = @source = arg
@to_utf = false @to_utf = false
# Determining the encoding is a deceptively difficult issue to resolve. # Determining the encoding is a deceptively difficult issue to resolve.
# First, we check the first two bytes for UTF-16. Then we # First, we check the first two bytes for UTF-16. Then we
# assume that the encoding is at least ASCII enough for the '>', and # assume that the encoding is at least ASCII enough for the '>', and
@ -147,86 +147,89 @@ module REXML
self.encoding = encoding self.encoding = encoding
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
self.encoding = check_encoding( str ) self.encoding = check_encoding( str )
elsif (0xef == str[0] && 0xbb == str[1])
str += @source.read(1)
str = '' if (0xbf == str[2])
else else
@line_break = '>' @line_break = '>'
end end
super str+@source.readline( @line_break ) super str+@source.readline( @line_break )
end end
def scan(pattern, cons=false) def scan(pattern, cons=false)
rv = super rv = super
# You'll notice that this next section is very similar to the same # You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is # section in match(), but just a liiittle different. This is
# because it is a touch faster to do it this way with scan() # because it is a touch faster to do it this way with scan()
# than the way match() does it; enough faster to warrent duplicating # than the way match() does it; enough faster to warrent duplicating
# some code # some code
if rv.size == 0 if rv.size == 0
until @buffer =~ pattern or @source.nil? until @buffer =~ pattern or @source.nil?
begin begin
# READLINE OPT # READLINE OPT
#str = @source.read(@block_size) #str = @source.read(@block_size)
str = @source.readline(@line_break) str = @source.readline(@line_break)
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rescue Iconv::IllegalSequence rescue Iconv::IllegalSequence
raise raise
rescue rescue
@source = nil @source = nil
end end
end end
rv = super rv = super
end end
rv.taint rv.taint
rv rv
end end
def read def read
begin begin
str = @source.readline(@line_break) str = @source.readline(@line_break)
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rescue Exception, NameError rescue Exception, NameError
@source = nil @source = nil
end end
end end
def consume( pattern ) def consume( pattern )
match( pattern, true ) match( pattern, true )
end end
def match( pattern, cons=false ) def match( pattern, cons=false )
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if cons and rv @buffer = $' if cons and rv
while !rv and @source while !rv and @source
begin begin
str = @source.readline(@line_break) str = @source.readline(@line_break)
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if cons and rv @buffer = $' if cons and rv
rescue rescue
@source = nil @source = nil
end end
end end
rv.taint rv.taint
rv rv
end end
def empty? def empty?
super and ( @source.nil? || @source.eof? ) super and ( @source.nil? || @source.eof? )
end end
def position def position
@er_source.stat.pipe? ? 0 : @er_source.pos @er_source.stat.pipe? ? 0 : @er_source.pos
end end
# @return the current line in the source # @return the current line in the source
def current_line def current_line
begin begin
pos = @er_source.pos # The byte position in the source pos = @er_source.pos # The byte position in the source
lineno = @er_source.lineno # The XML < position in the source lineno = @er_source.lineno # The XML < position in the source
@er_source.rewind @er_source.rewind
line = 0 # The \r\n position in the source line = 0 # The \r\n position in the source
begin begin
while @er_source.pos < pos while @er_source.pos < pos
@er_source.readline @er_source.readline
@ -238,7 +241,7 @@ module REXML
pos = -1 pos = -1
line = -1 line = -1
end end
[pos, lineno, line] [pos, lineno, line]
end end
end end
end end

View file

@ -211,16 +211,17 @@ module REXML
return new_string return new_string
end end
# == DEPRECATED
# See REXML::Formatters
#
def write( writer, indent=-1, transitive=false, ie_hack=false ) def write( writer, indent=-1, transitive=false, ie_hack=false )
s = to_s() Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
if not (@parent and @parent.whitespace) then formatter = if indent > -1
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all REXML::Formatters::Pretty.new( indent )
if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0 else
s = indent_text(s, indent, @parent.context[:indentstyle], false) REXML::Formatters::Default.new
end end
s.squeeze!(" \n\t") if @parent and !@parent.whitespace formatter.write( self, writer )
end
writer << s
end end
# FIXME # FIXME

View file

@ -13,7 +13,7 @@ module REXML
STOP = '\?>'; STOP = '\?>';
attr_accessor :version, :standalone attr_accessor :version, :standalone
attr_reader :writeencoding attr_reader :writeencoding, :writethis
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil) def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true @writethis = true
@ -37,9 +37,14 @@ module REXML
XMLDecl.new(self) XMLDecl.new(self)
end end
def write writer, indent=-1, transitive=false, ie_hack=false # indent::
# Ignored. There must be no whitespace before an XML declaration
# transitive::
# Ignored
# ie_hack::
# Ignored
def write(writer, indent=-1, transitive=false, ie_hack=false)
return nil unless @writethis or writer.kind_of? Output return nil unless @writethis or writer.kind_of? Output
indent( writer, indent )
writer << START.sub(/\\/u, '') writer << START.sub(/\\/u, '')
if writer.kind_of? Output if writer.kind_of? Output
writer << " #{content writer.encoding}" writer << " #{content writer.encoding}"

View file

@ -352,7 +352,8 @@ module REXML
when :following_sibling when :following_sibling
#puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}" #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
results = [] results = []
for node in nodeset nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children all_siblings = node.parent.children
current_index = all_siblings.index( node ) current_index = all_siblings.index( node )
following_siblings = all_siblings[ current_index+1 .. -1 ] following_siblings = all_siblings[ current_index+1 .. -1 ]
@ -363,13 +364,14 @@ module REXML
when :preceding_sibling when :preceding_sibling
results = [] results = []
for node in nodeset nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children all_siblings = node.parent.children
current_index = all_siblings.index( node ) current_index = all_siblings.index( node )
preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse preceding_siblings = all_siblings[ 0, current_index ].reverse
#results += expr( path_stack.dclone, preceding_siblings ) results += preceding_siblings
end end
nodeset = preceding_siblings || [] nodeset = results
node_types = ELEMENTS node_types = ELEMENTS
when :preceding when :preceding