1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Merges upstream changes for REXML v3.1.7

http://www.germane-software.com/repos/rexml/tags/3.1.7

 r1278@bean:  ser | 2007-06-07 00:53:06 -0400
 Fixed a double-encoding bug.  This was a regression, related
 to ticket:48.
 r1292@bean:  ser | 2007-07-25 08:19:36 -0400
  r1279@bean:  ser | 2007-06-09 23:19:02 -0400
  Fixes ticket:89 -- encoding CP-1252 was broken.  ISO-8859-15 had the same
  problem.
  
  Also in this patch is a fix to merge.rb (unused, but it should at least
  contain no errors), and a unit test for ticket:88.
 
 r1293@bean:  ser | 2007-07-25 08:19:37 -0400
  r1281@bean:  ser | 2007-07-24 11:08:48 -0400
  Addresses ticket:85
  
  This is a major rewrite of the XML formatting code.  The XML writers have all
  been extracted out of the classes and put into their own class containers.
  This makes writing parsers easier, and cleaner.
  
  There are three formatters, which correspond to the previous three XML writing
  modes:
  
    REXML::Formatters::Default    
        Prints the XML document exactly as it was parsed
    REXML::Formatters::Pretty     
        Pretty prints the XML document, destroying whitespace in the document
    REXML::Formatters::Transitive 
        Pretty prints the XML document, preserving whitespace
  
  All of the write() functions have been deprecated (some are still used, but
  these will also go away) except the write() function on Document, which is left
  for convenience.  To pretty print an XML document the canonical way:
  
    formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces
    formatter.write( document, output )
  
 
 r1294@bean:  ser | 2007-07-25 08:19:38 -0400
  r1283@bean:  ser | 2007-07-24 19:53:30 -0400
  This goes with the previous commit.
 
 r1295@bean:  ser | 2007-07-25 08:19:39 -0400
  r1285@bean:  ser | 2007-07-24 20:02:07 -0400
  And THIS goes with the previous two patches.  Dammit.
 
 r1296@bean:  ser | 2007-07-25 08:19:40 -0400
  r1287@bean:  ser | 2007-07-24 20:12:25 -0400
  Applied patch from Jeff Barczewski.  Note that this changes what the values of
  the name and IDs are from the previous behavior -- the values no longer include
  the quotes.  This is the correct behavior, so I'm leaving it in, but it is not
  backwards compatible.  Also fixes the serializer so that it outputs the doctype
  in a correct format (needed as a result of this change).
 
 r1297@bean:  ser | 2007-07-25 08:38:38 -0400
 Version update


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2007-07-25 12:47:23 +00:00
parent fead3c93e7
commit 1d8c98a486
23 changed files with 1689 additions and 1328 deletions

View file

@ -18,16 +18,32 @@ module REXML
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
# Constructor.
# FIXME: The parser doesn't catch illegal characters in attributes
#
# first::
# Either: an Attribute, which this new attribute will become a
# clone of; or a String, which is the name of this attribute
# second::
# If +first+ is an Attribute, then this may be an Element, or nil.
# If nil, then the Element parent of this attribute is the parent
# of the +first+ Attribute. If the first argument is a String,
# then this must also be a String, and is the content of the attribute.
# If this is the content, it must be fully normalized (contain no
# illegal characters).
# parent::
# Ignored unless +first+ is a String; otherwise, may be the Element
# parent of this attribute, or nil.
#
#
# Attribute.new( attribute_to_clone )
# Attribute.new( source )
# Attribute.new( attribute_to_clone, parent_element )
# Attribute.new( "attr", "attr_value" )
# Attribute.new( "attr", "attr_value", parent_element )
def initialize( first, second=nil, parent=nil )
@normalized = @unnormalized = @element = nil
if first.kind_of? Attribute
self.name = first.expanded_name
@value = first.value
@unnormalized = first.value
if second.kind_of? Element
@element = second
else
@ -36,7 +52,7 @@ module REXML
elsif first.kind_of? String
@element = parent if parent.kind_of? Element
self.name = first
@value = second.to_s
@normalized = second.to_s
else
raise "illegal argument #{first.class.name} to Attribute constructor"
end
@ -72,7 +88,7 @@ module REXML
# Returns true if other is an Attribute and has the same name and value,
# false otherwise.
def ==( other )
other.kind_of?(Attribute) and other.name==name and other.value==@value
other.kind_of?(Attribute) and other.name==name and other.value==value
end
# Creates (and returns) a hash from both the name and value
@ -87,8 +103,12 @@ module REXML
# b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'"
def to_string
if @element and @element.context and @element.context[:attribute_quote] == :quote
%Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
else
"#@expanded_name='#{to_s().gsub(/'/, ''')}'"
end
end
# Returns the attribute value, with entities replaced
def to_s
@ -100,8 +120,9 @@ module REXML
doctype = doc.doctype if doc
end
@normalized = Text::normalize( @unnormalized, doctype )
@unnormalized = nil
@normalized = Text::normalize( @value, doctype )
@normalized
end
# Returns the UNNORMALIZED value of this attribute. That is, entities
@ -113,8 +134,9 @@ module REXML
doc = @element.document
doctype = doc.doctype if doc
end
@unnormalized = Text::unnormalize( @normalized, doctype )
@normalized = nil
@unnormalized = Text::unnormalize( @value, doctype )
@unnormalized
end
# Returns a copy of this attribute

View file

@ -39,31 +39,26 @@ module REXML
@string
end
# == DEPRECATED
# See the rexml/formatters package
#
# Generates XML output of this object
#
# output::
# Where to write the string. Defaults to $stdout
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount. Defaults to -1.
# The amount to indent this node by
# transitive::
# If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# Ignored
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags.
# Ignored
#
# _Examples_
# c = CData.new( " Some text " )
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
#indent( output, indent ) unless transitive
Kernel.warn( "#{self.class.name}.write is deprecated" )
indent( output, indent )
output << START
output << @string
output << STOP

View file

@ -34,6 +34,9 @@ module REXML
Comment.new self
end
# == DEPRECATED
# See REXML::Formatters
#
# output::
# Where to write the string
# indent::
@ -45,6 +48,7 @@ module REXML
# ie_hack::
# Needed for conformity to the child API, but not used by this class.
def write( output, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
indent( output, indent )
output << START
output << @string

View file

@ -98,38 +98,30 @@ module REXML
# output::
# Where to write the string
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# An integer. If -1, no indentation will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
# If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# Ignored
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags.
#
# Ignored
def write( output, indent=0, transitive=false, ie_hack=false )
f = REXML::Formatters::Default.new
indent( output, indent )
output << START
output << ' '
output << @name
output << " #@external_id" if @external_id
output << " #@long_name" if @long_name
output << " #@uri" if @uri
output << " #{@long_name.inspect}" if @long_name
output << " #{@uri.inspect}" if @uri
unless @children.empty?
next_indent = indent + 1
output << ' ['
child = nil # speed
@children.each { |child|
output << "\n"
child.write( output, next_indent )
f.write( child, output )
}
#output << ' '*next_indent
output << "\n]"
end
output << STOP
@ -219,8 +211,10 @@ module REXML
@string+'>'
end
# == DEPRECATED
# See REXML::Formatters
#
def write( output, indent )
output << (' '*indent) if indent > 0
output << to_s
end
end
@ -264,7 +258,6 @@ module REXML
end
def write( output, indent=-1 )
output << (' '*indent) if indent > 0
output << to_s
end

View file

@ -31,9 +31,6 @@ module REXML
# to be sources of valid XML documents.
# @param context if supplied, contains the context of the document;
# this should be a Hash.
# NOTE that I'm not sure what the context is for; I cloned it out of
# the Electric XML API (in which it also seems to do nothing), and it
# is now legacy. It may do something, someday... it may disappear.
def initialize( source = nil, context = {} )
super()
@context = context
@ -142,15 +139,54 @@ module REXML
xml_decl().stand_alone?
end
# Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given).
# Write the XML tree out. This writes the entire XML document, including
# declarations and processing instructions.
#
# A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not
# specified, because it adds unneccessary bandwidth to applications such
# as XML-RPC.
#
# _Examples_
# Document.new("<a><b/></a>").serialize
#
# output_string = ""
# tr = Transitive.new( output_string )
# Document.new("<a><b/></a>").serialize( tr )
#
# formatter::
# One of the rexml/formatters classes. If none is given, then the Pretty
# formatter will be used to dump the XML to the STDOUT.
def serialize( formatter = nil )
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
formatter = REXML::Pretty.new( $stdout ) if (formatter.nil?)
@children.each { |node|
puts "node = #{node.inspect}"
indent( output, indent ) if node.node_type == :element
if node.write( output, indent, transitive, ie_hack )
output << "\n" unless indent<0 or node == @children[-1]
end
}
end
# Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given).
#
# A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not
# specified, because it adds unneccessary bandwidth to applications such
# as XML-RPC.
#
# See also the classes in the rexml/formatters package for the proper way
# to change the default formatting of XML output
#
#
# output::
# output an object which supports '<< string'; this is where the
@ -160,7 +196,7 @@ module REXML
# indentation will be twice this number of spaces, and children will be
# indented an additional amount. For a value of 3, every item will be
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
# transitive::
# trans::
# If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
@ -171,14 +207,20 @@ module REXML
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
@children.each { |node|
indent( output, indent ) if node.node_type == :element
if node.write( output, indent, transitive, ie_hack )
output << "\n" unless indent<0 or node == @children[-1]
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
}
formatter = if indent > -1
if transitive
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
end
else
REXML::Formatters::Default.new( ie_hack )
end
formatter.write( self, output )
end

View file

@ -295,14 +295,9 @@ module REXML
def add_element element, attrs=nil
raise "First argument must be either an element name, or an Element object" if element.nil?
el = @elements.add(element)
if attrs.kind_of? Hash
attrs.each do |key, value|
el.attributes[key]=value if key =~ /^xmlns:/
end
attrs.each do |key, value|
el.attributes[key]=value if key !~ /^xmlns:/
end
end
el.attributes[key]=Attribute.new(key,value,self)
end if attrs.kind_of? Hash
el
end
@ -577,7 +572,8 @@ module REXML
# value::
# Required if +key+ is a String, and ignored if the first argument is
# an Attribute. This is a String, and is used as the value
# of the new Attribute.
# of the new Attribute. This should be the unnormalized value of the
# attribute (without entities).
# Returns:: the Attribute added
# e = Element.new 'e'
# e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
@ -649,6 +645,9 @@ module REXML
find_all { |child| child.kind_of? Text }.freeze
end
# == DEPRECATED
# See REXML::Formatters
#
# Writes out this element, and recursively, all children.
# output::
# output an object which supports '<< string'; this is where the
@ -672,37 +671,17 @@ module REXML
# doc.write( out ) #-> doc is written to the string 'out'
# doc.write( $stdout ) #-> doc written to the console
def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
#print "ID:#{indent}"
writer << "<#@expanded_name"
@attributes.each_attribute do |attr|
writer << " "
attr.write( writer, indent )
end unless @attributes.empty?
if @children.empty?
if transitive and indent>-1
writer << "\n"
indent( writer, indent )
elsif ie_hack
writer << " "
end
writer << "/"
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
formatter = if indent > -1
if transitive
REXML::Formatters::Transitive.new( indent, ie_hack )
else
if transitive and indent>-1 and !@children[0].kind_of? Text
writer << "\n"
indent writer, indent+1
REXML::Formatters::Pretty.new( indent, ie_hack )
end
writer << ">"
write_children( writer, indent, transitive, ie_hack )
writer << "</#{expanded_name}"
else
REXML::Formatters::Default.new( ie_hack )
end
if transitive and indent>-1 and !@children.empty?
writer << "\n"
indent -= 1 if next_sibling.nil?
indent(writer, indent)
end
writer << ">"
formatter.write( self, output )
end
@ -730,29 +709,6 @@ module REXML
return if max>0 and num == max
}
end
# A private helper method
def write_children( writer, indent, transitive, ie_hack )
cr = (indent < 0) ? '' : "\n"
if indent == -1
each { |child| child.write( writer, indent, transitive, ie_hack ) }
else
next_indent = indent+1
last_child=nil
each { |child|
unless child.kind_of? Text or last_child.kind_of? Text or transitive
writer << cr
indent(writer, next_indent)
end
child.write( writer, next_indent, transitive, ie_hack )
last_child = child
}
unless last_child.kind_of? Text or transitive
writer << cr
indent( writer, indent )
end
end
end
end
########################################################################
@ -1006,10 +962,11 @@ module REXML
# name:: an XPath attribute name. Namespaces are relevant here.
# Returns::
# the String value of the matching attribute, or +nil+ if no
# matching attribute was found.
# matching attribute was found. This is the unnormalized value
# (with entities expanded).
#
# doc = Document.new "<a foo:att='1' bar:att='2' att='3'/>"
# doc.root.attributes['att'] #-> '3'
# doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
# doc.root.attributes['att'] #-> '<'
# doc.root.attributes['bar:att'] #-> '2'
def [](name)
attr = get_attribute(name)
@ -1119,7 +1076,15 @@ module REXML
delete attr
return
end
value = Attribute.new(name, value) unless value.kind_of? Attribute
element_document = @element.document
unless value.kind_of? Attribute
if @element.document and @element.document.doctype
value = Text::normalize( value, @element.document.doctype )
else
value = Text::normalize( value, nil )
end
value = Attribute.new(name, value)
end
value.element = @element
old_attr = fetch(value.name, nil)
if old_attr.nil?

View file

@ -58,8 +58,8 @@ module REXML
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
return UTF_16 if /\A\xfe\xff/n =~ str
return UNILE if /\A\xff\xfe/n =~ str
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
return $1.upcase if $1
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
return $3.upcase if $3
return UTF_8
end
end

View file

@ -3,9 +3,15 @@
#
module REXML
module Encoding
@@__REXML_encoding_methods = %q~
register( "CP-1252" ) do |o|
class << o
alias encode encode_cp1252
alias decode decode_cp1252
end
end
# Convert from UTF-8
def encode content
def encode_cp1252(content)
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@ -54,7 +60,7 @@ module REXML
end
# Convert to UTF-8
def decode(str)
def decode_cp1252(str)
array_latin9 = str.unpack('C*')
array_enc = []
array_latin9.each do |num|
@ -93,6 +99,5 @@ module REXML
end
array_enc.pack('U*')
end
~
end
end

View file

@ -3,9 +3,13 @@
#
module REXML
module Encoding
@@__REXML_encoding_methods = %q~
register("ISO-8859-15") do |o|
alias encode to_iso_8859_15
alias decode from_iso_8859_15
end
# Convert from UTF-8
def to_iso_8859_15 content
def to_iso_8859_15(content)
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@ -64,6 +68,5 @@ module REXML
end
array_enc.pack('U*')
end
~
end
end

View file

@ -89,6 +89,12 @@ module REXML
# Write out a fully formed, correct entity definition (assuming the Entity
# object itself is valid.)
#
# out::
# An object implementing <TT>&lt;&lt;<TT> to which the entity will be
# output
# indent::
# *DEPRECATED* and ignored
def write out, indent=-1
out << '<!ENTITY '
out << '% ' if @reference

View file

@ -0,0 +1,109 @@
module REXML
module Formatters
class Default
# Prints out the XML document with no formatting -- except if id_hack is
# set.
#
# ie_hack::
# If set to true, then inserts whitespace before the close of an empty
# tag, so that IE's bad XML parser doesn't choke.
def initialize( ie_hack=false )
@ie_hack = ie_hack
end
# Writes the node to some output.
#
# node::
# The node to write
# output::
# A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
# change the output encoding.
def write( node, output )
case node
when Document
if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )
when Element
write_element( node, output )
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
Attribute, AttlistDecl
node.write( output,-1 )
when Instruction
write_instruction( node, output )
when DocType, XMLDecl
node.write( output )
when Comment
write_comment( node, output )
when CData
write_cdata( node, output )
when Text
write_text( node, output )
else
raise Exception.new("XML FORMATTING ERROR")
end
end
protected
def write_document( node, output )
node.children.each { |child| write( child, output ) }
end
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
output << " " if @ie_hack
output << "/"
else
output << ">"
node.children.each { |child|
write( child, output )
}
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
def write_comment( node, output )
output << Comment::START
output << node.to_s
output << Comment::STOP
end
def write_cdata( node, output )
output << CData::START
output << node.to_s
output << CData::STOP
end
def write_instruction( node, output )
output << Instruction::START.sub(/\\/u, '')
output << node.target
output << ' '
output << node.content
output << Instruction::STOP.sub(/\\/u, '')
end
end
end
end

View file

@ -0,0 +1,134 @@
require 'rexml/formatters/default'
module REXML
module Formatters
# Pretty-prints an XML document. This destroys whitespace in text nodes
# and will insert carriage returns and indentations.
#
# TODO: Add an option to print attributes on new lines
class Pretty < Default
# If compact is set to true, then the formatter will attempt to use as
# little space as possible
attr_accessor :compact
# The width of a page. Used for formatting text
attr_accessor :width
# Create a new pretty printer.
#
# output::
# An object implementing '<<(String)', to which the output will be written.
# indentation::
# An integer greater than 0. The indentation of each level will be
# this number of spaces. If this is < 1, the behavior of this object
# is undefined. Defaults to 2.
# ie_hack::
# If true, the printer will insert whitespace before closing empty
# tags, thereby allowing Internet Explorer's feeble XML parser to
# function. Defaults to false.
def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
@ie_hack = ie_hack
@width = 80
end
protected
def write_element(node, output)
output << ' '*@level
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
if @ie_hack
output << " "
end
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
string = ""
node.children.each { |child| write( child, string, 0 ) }
if string.length + @level < @width
output << string
skip = true
end
end
end
unless skip
output << "\n"
@level += @indentation
node.children.each { |child|
next if child.kind_of?(Text) and child.to_s.strip.length == 0
write( child, output )
output << "\n"
}
@level -= @indentation
output << ' '*@level
end
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
s = node.to_s()
s.gsub!(/\s/,' ')
s.squeeze!(" ")
s = wrap(s, 80-@level)
s = indent_text(s, @level, " ", true)
output << (' '*@level + s)
end
def write_comment( node, output)
output << ' ' * @level
super
end
def write_cdata( node, output)
output << ' ' * @level
super
end
def write_document( node, output )
# Ok, this is a bit odd. All XML documents have an XML declaration,
# but it may not write itself if the user didn't specifically add it,
# either through the API or in the input document. If it doesn't write
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
next if child == node.children[-1] and child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
end
write( child, output )
}
end
private
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
string.gsub(/\n/, "\n#{style*level}")
end
def wrap(string, width)
# Recursivly wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
end
end

View file

@ -0,0 +1,56 @@
require 'rexml/formatters/pretty'
module REXML
module Formatters
# The Transitive formatter writes an XML document that parses to an
# identical document as the source document. This means that no extra
# whitespace nodes are inserted, and whitespace within text nodes is
# preserved. Within these constraints, the document is pretty-printed,
# with whitespace inserted into the metadata to introduce formatting.
#
# Note that this is only useful if the original XML is not already
# formatted. Since this formatter does not alter whitespace nodes, the
# results of formatting already formatted XML will be odd.
class Transitive < Default
def initialize( indentation=2 )
@indentation = indentation
@level = 0
end
protected
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
output << "\n"
output << ' '*@level
if node.children.empty?
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
@level += @indentation
node.children.each { |child|
write( child, output )
}
@level -= @indentation
output << "</#{node.expanded_name}"
output << "\n"
output << ' '*@level
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
end
end
end

View file

@ -38,7 +38,11 @@ module REXML
Instruction.new self
end
# == DEPRECATED
# See the rexml/formatters package
#
def write writer, indent=-1, transitive=false, ie_hack=false
Kernel.warn( "#{self.class.name}.write is deprecated" )
indent(writer, indent)
writer << START.sub(/\\/u, '')
writer << @target

View file

@ -18,10 +18,19 @@ module REXML
@parent[ ind - 1 ]
end
def to_s indent=-1
rv = ""
write rv,indent
rv
# indent::
# *DEPRECATED* This parameter is now ignored. See the formatters in the
# REXML::Formatters package for changing the output style.
def to_s indent=nil
unless indent.nil?
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
f = REXML::Formatters::Pretty.new( indent )
f.write( self, rv, indent )
else
f = REXML::Formatters::Default.new
f.write( self, rv = "" )
end
return rv
end
def indent to, ind

View file

@ -53,7 +53,7 @@ module REXML
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
ENTITY_START = /^\s*<!ENTITY/
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
@ -217,10 +217,10 @@ module REXML
close = md[2]
identity =~ IDENTITY
name = $1
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip
long_name = $3.nil? ? nil : $3.strip
uri = $4.nil? ? nil : $4.strip
long_name = $4.nil? ? nil : $4.strip
uri = $6.nil? ? nil : $6.strip
args = [ :start_doctype, name, pub_sys, long_name, uri ]
if close == ">"
@document_status = :after_doctype

View file

@ -94,6 +94,8 @@ module REXML
when :end_document
handle( :end_document )
break
when :start_doctype
handle( :doctype, *event[1..-1])
when :end_doctype
context = context[1]
when :start_element
@ -167,7 +169,7 @@ module REXML
when :entitydecl
@entities[ event[1] ] = event[2] if event.size == 3
handle( *event )
when :processing_instruction, :comment, :doctype, :attlistdecl,
when :processing_instruction, :comment, :attlistdecl,
:elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event )
end

View file

@ -551,7 +551,7 @@ module REXML
end
end
#puts "BEFORE WITH '#{rest}'"
rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
parsed.concat(n)
return rest
end

View file

@ -10,8 +10,8 @@
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.5
# Date:: 2006/250
# Version:: 3.1.7
# Date:: 2007/206
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -20,9 +20,10 @@
# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
DATE = "2006/250"
VERSION = "3.1.5"
COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
DATE = "2007/206"
VERSION = "3.1.7"
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
Copyright = COPYRIGHT
Version = VERSION

View file

@ -147,6 +147,9 @@ module REXML
self.encoding = encoding
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
self.encoding = check_encoding( str )
elsif (0xef == str[0] && 0xbb == str[1])
str += @source.read(1)
str = '' if (0xbf == str[2])
else
@line_break = '>'
end

View file

@ -211,16 +211,17 @@ module REXML
return new_string
end
# == DEPRECATED
# See REXML::Formatters
#
def write( writer, indent=-1, transitive=false, ie_hack=false )
s = to_s()
if not (@parent and @parent.whitespace) then
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
s = indent_text(s, indent, @parent.context[:indentstyle], false)
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
formatter = if indent > -1
REXML::Formatters::Pretty.new( indent )
else
REXML::Formatters::Default.new
end
s.squeeze!(" \n\t") if @parent and !@parent.whitespace
end
writer << s
formatter.write( self, writer )
end
# FIXME

View file

@ -13,7 +13,7 @@ module REXML
STOP = '\?>';
attr_accessor :version, :standalone
attr_reader :writeencoding
attr_reader :writeencoding, :writethis
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true
@ -37,9 +37,14 @@ module REXML
XMLDecl.new(self)
end
def write writer, indent=-1, transitive=false, ie_hack=false
# indent::
# Ignored. There must be no whitespace before an XML declaration
# transitive::
# Ignored
# ie_hack::
# Ignored
def write(writer, indent=-1, transitive=false, ie_hack=false)
return nil unless @writethis or writer.kind_of? Output
indent( writer, indent )
writer << START.sub(/\\/u, '')
if writer.kind_of? Output
writer << " #{content writer.encoding}"

View file

@ -352,7 +352,8 @@ module REXML
when :following_sibling
#puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
results = []
for node in nodeset
nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children
current_index = all_siblings.index( node )
following_siblings = all_siblings[ current_index+1 .. -1 ]
@ -363,13 +364,14 @@ module REXML
when :preceding_sibling
results = []
for node in nodeset
nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children
current_index = all_siblings.index( node )
preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse
#results += expr( path_stack.dclone, preceding_siblings )
preceding_siblings = all_siblings[ 0, current_index ].reverse
results += preceding_siblings
end
nodeset = preceding_siblings || []
nodeset = results
node_types = ELEMENTS
when :preceding