mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Merges upstream changes for REXML v3.1.7
http://www.germane-software.com/repos/rexml/tags/3.1.7 r1278@bean: ser | 2007-06-07 00:53:06 -0400 Fixed a double-encoding bug. This was a regression, related to ticket:48. r1292@bean: ser | 2007-07-25 08:19:36 -0400 r1279@bean: ser | 2007-06-09 23:19:02 -0400 Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same problem. Also in this patch is a fix to merge.rb (unused, but it should at least contain no errors), and a unit test for ticket:88. r1293@bean: ser | 2007-07-25 08:19:37 -0400 r1281@bean: ser | 2007-07-24 11:08:48 -0400 Addresses ticket:85 This is a major rewrite of the XML formatting code. The XML writers have all been extracted out of the classes and put into their own class containers. This makes writing parsers easier, and cleaner. There are three formatters, which correspond to the previous three XML writing modes: REXML::Formatters::Default Prints the XML document exactly as it was parsed REXML::Formatters::Pretty Pretty prints the XML document, destroying whitespace in the document REXML::Formatters::Transitive Pretty prints the XML document, preserving whitespace All of the write() functions have been deprecated (some are still used, but these will also go away) except the write() function on Document, which is left for convenience. To pretty print an XML document the canonical way: formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces formatter.write( document, output ) r1294@bean: ser | 2007-07-25 08:19:38 -0400 r1283@bean: ser | 2007-07-24 19:53:30 -0400 This goes with the previous commit. r1295@bean: ser | 2007-07-25 08:19:39 -0400 r1285@bean: ser | 2007-07-24 20:02:07 -0400 And THIS goes with the previous two patches. Dammit. r1296@bean: ser | 2007-07-25 08:19:40 -0400 r1287@bean: ser | 2007-07-24 20:12:25 -0400 Applied patch from Jeff Barczewski. Note that this changes what the values of the name and IDs are from the previous behavior -- the values no longer include the quotes. This is the correct behavior, so I'm leaving it in, but it is not backwards compatible. Also fixes the serializer so that it outputs the doctype in a correct format (needed as a result of this change). r1297@bean: ser | 2007-07-25 08:38:38 -0400 Version update git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
fead3c93e7
commit
1d8c98a486
23 changed files with 1689 additions and 1328 deletions
|
@ -18,16 +18,32 @@ module REXML
|
||||||
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
||||||
|
|
||||||
# Constructor.
|
# Constructor.
|
||||||
|
# FIXME: The parser doesn't catch illegal characters in attributes
|
||||||
|
#
|
||||||
|
# first::
|
||||||
|
# Either: an Attribute, which this new attribute will become a
|
||||||
|
# clone of; or a String, which is the name of this attribute
|
||||||
|
# second::
|
||||||
|
# If +first+ is an Attribute, then this may be an Element, or nil.
|
||||||
|
# If nil, then the Element parent of this attribute is the parent
|
||||||
|
# of the +first+ Attribute. If the first argument is a String,
|
||||||
|
# then this must also be a String, and is the content of the attribute.
|
||||||
|
# If this is the content, it must be fully normalized (contain no
|
||||||
|
# illegal characters).
|
||||||
|
# parent::
|
||||||
|
# Ignored unless +first+ is a String; otherwise, may be the Element
|
||||||
|
# parent of this attribute, or nil.
|
||||||
|
#
|
||||||
#
|
#
|
||||||
# Attribute.new( attribute_to_clone )
|
# Attribute.new( attribute_to_clone )
|
||||||
# Attribute.new( source )
|
# Attribute.new( attribute_to_clone, parent_element )
|
||||||
# Attribute.new( "attr", "attr_value" )
|
# Attribute.new( "attr", "attr_value" )
|
||||||
# Attribute.new( "attr", "attr_value", parent_element )
|
# Attribute.new( "attr", "attr_value", parent_element )
|
||||||
def initialize( first, second=nil, parent=nil )
|
def initialize( first, second=nil, parent=nil )
|
||||||
@normalized = @unnormalized = @element = nil
|
@normalized = @unnormalized = @element = nil
|
||||||
if first.kind_of? Attribute
|
if first.kind_of? Attribute
|
||||||
self.name = first.expanded_name
|
self.name = first.expanded_name
|
||||||
@value = first.value
|
@unnormalized = first.value
|
||||||
if second.kind_of? Element
|
if second.kind_of? Element
|
||||||
@element = second
|
@element = second
|
||||||
else
|
else
|
||||||
|
@ -36,7 +52,7 @@ module REXML
|
||||||
elsif first.kind_of? String
|
elsif first.kind_of? String
|
||||||
@element = parent if parent.kind_of? Element
|
@element = parent if parent.kind_of? Element
|
||||||
self.name = first
|
self.name = first
|
||||||
@value = second.to_s
|
@normalized = second.to_s
|
||||||
else
|
else
|
||||||
raise "illegal argument #{first.class.name} to Attribute constructor"
|
raise "illegal argument #{first.class.name} to Attribute constructor"
|
||||||
end
|
end
|
||||||
|
@ -72,7 +88,7 @@ module REXML
|
||||||
# Returns true if other is an Attribute and has the same name and value,
|
# Returns true if other is an Attribute and has the same name and value,
|
||||||
# false otherwise.
|
# false otherwise.
|
||||||
def ==( other )
|
def ==( other )
|
||||||
other.kind_of?(Attribute) and other.name==name and other.value==@value
|
other.kind_of?(Attribute) and other.name==name and other.value==value
|
||||||
end
|
end
|
||||||
|
|
||||||
# Creates (and returns) a hash from both the name and value
|
# Creates (and returns) a hash from both the name and value
|
||||||
|
@ -87,7 +103,11 @@ module REXML
|
||||||
# b = Attribute.new( "ns:x", "y" )
|
# b = Attribute.new( "ns:x", "y" )
|
||||||
# b.to_string # -> "ns:x='y'"
|
# b.to_string # -> "ns:x='y'"
|
||||||
def to_string
|
def to_string
|
||||||
"#@expanded_name='#{to_s().gsub(/'/, ''')}'"
|
if @element and @element.context and @element.context[:attribute_quote] == :quote
|
||||||
|
%Q^#@expanded_name="#{to_s().gsub(/"/, '"e;')}"^
|
||||||
|
else
|
||||||
|
"#@expanded_name='#{to_s().gsub(/'/, ''')}'"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns the attribute value, with entities replaced
|
# Returns the attribute value, with entities replaced
|
||||||
|
@ -100,8 +120,9 @@ module REXML
|
||||||
doctype = doc.doctype if doc
|
doctype = doc.doctype if doc
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@normalized = Text::normalize( @unnormalized, doctype )
|
||||||
@unnormalized = nil
|
@unnormalized = nil
|
||||||
@normalized = Text::normalize( @value, doctype )
|
@normalized
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns the UNNORMALIZED value of this attribute. That is, entities
|
# Returns the UNNORMALIZED value of this attribute. That is, entities
|
||||||
|
@ -113,8 +134,9 @@ module REXML
|
||||||
doc = @element.document
|
doc = @element.document
|
||||||
doctype = doc.doctype if doc
|
doctype = doc.doctype if doc
|
||||||
end
|
end
|
||||||
|
@unnormalized = Text::unnormalize( @normalized, doctype )
|
||||||
@normalized = nil
|
@normalized = nil
|
||||||
@unnormalized = Text::unnormalize( @value, doctype )
|
@unnormalized
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns a copy of this attribute
|
# Returns a copy of this attribute
|
||||||
|
|
|
@ -39,31 +39,26 @@ module REXML
|
||||||
@string
|
@string
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# == DEPRECATED
|
||||||
|
# See the rexml/formatters package
|
||||||
|
#
|
||||||
# Generates XML output of this object
|
# Generates XML output of this object
|
||||||
#
|
#
|
||||||
# output::
|
# output::
|
||||||
# Where to write the string. Defaults to $stdout
|
# Where to write the string. Defaults to $stdout
|
||||||
# indent::
|
# indent::
|
||||||
# An integer. If -1, no indenting will be used; otherwise, the
|
# The amount to indent this node by
|
||||||
# indentation will be this number of spaces, and children will be
|
|
||||||
# indented an additional amount. Defaults to -1.
|
|
||||||
# transitive::
|
# transitive::
|
||||||
# If transitive is true and indent is >= 0, then the output will be
|
# Ignored
|
||||||
# pretty-printed in such a way that the added whitespace does not affect
|
|
||||||
# the absolute *value* of the document -- that is, it leaves the value
|
|
||||||
# and number of Text nodes in the document unchanged.
|
|
||||||
# ie_hack::
|
# ie_hack::
|
||||||
# Internet Explorer is the worst piece of crap to have ever been
|
# Ignored
|
||||||
# written, with the possible exception of Windows itself. Since IE is
|
|
||||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
|
||||||
# that IE's limited abilities can handle. This hack inserts a space
|
|
||||||
# before the /> on empty tags.
|
|
||||||
#
|
#
|
||||||
# _Examples_
|
# _Examples_
|
||||||
# c = CData.new( " Some text " )
|
# c = CData.new( " Some text " )
|
||||||
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
|
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
|
||||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||||
#indent( output, indent ) unless transitive
|
Kernel.warn( "#{self.class.name}.write is deprecated" )
|
||||||
|
indent( output, indent )
|
||||||
output << START
|
output << START
|
||||||
output << @string
|
output << @string
|
||||||
output << STOP
|
output << STOP
|
||||||
|
|
|
@ -34,6 +34,9 @@ module REXML
|
||||||
Comment.new self
|
Comment.new self
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# == DEPRECATED
|
||||||
|
# See REXML::Formatters
|
||||||
|
#
|
||||||
# output::
|
# output::
|
||||||
# Where to write the string
|
# Where to write the string
|
||||||
# indent::
|
# indent::
|
||||||
|
@ -45,6 +48,7 @@ module REXML
|
||||||
# ie_hack::
|
# ie_hack::
|
||||||
# Needed for conformity to the child API, but not used by this class.
|
# Needed for conformity to the child API, but not used by this class.
|
||||||
def write( output, indent=-1, transitive=false, ie_hack=false )
|
def write( output, indent=-1, transitive=false, ie_hack=false )
|
||||||
|
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
|
||||||
indent( output, indent )
|
indent( output, indent )
|
||||||
output << START
|
output << START
|
||||||
output << @string
|
output << @string
|
||||||
|
|
|
@ -98,38 +98,30 @@ module REXML
|
||||||
# output::
|
# output::
|
||||||
# Where to write the string
|
# Where to write the string
|
||||||
# indent::
|
# indent::
|
||||||
# An integer. If -1, no indenting will be used; otherwise, the
|
# An integer. If -1, no indentation will be used; otherwise, the
|
||||||
# indentation will be this number of spaces, and children will be
|
# indentation will be this number of spaces, and children will be
|
||||||
# indented an additional amount.
|
# indented an additional amount.
|
||||||
# transitive::
|
# transitive::
|
||||||
# If transitive is true and indent is >= 0, then the output will be
|
# Ignored
|
||||||
# pretty-printed in such a way that the added whitespace does not affect
|
|
||||||
# the absolute *value* of the document -- that is, it leaves the value
|
|
||||||
# and number of Text nodes in the document unchanged.
|
|
||||||
# ie_hack::
|
# ie_hack::
|
||||||
# Internet Explorer is the worst piece of crap to have ever been
|
# Ignored
|
||||||
# written, with the possible exception of Windows itself. Since IE is
|
|
||||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
|
||||||
# that IE's limited abilities can handle. This hack inserts a space
|
|
||||||
# before the /> on empty tags.
|
|
||||||
#
|
|
||||||
def write( output, indent=0, transitive=false, ie_hack=false )
|
def write( output, indent=0, transitive=false, ie_hack=false )
|
||||||
|
f = REXML::Formatters::Default.new
|
||||||
indent( output, indent )
|
indent( output, indent )
|
||||||
output << START
|
output << START
|
||||||
output << ' '
|
output << ' '
|
||||||
output << @name
|
output << @name
|
||||||
output << " #@external_id" if @external_id
|
output << " #@external_id" if @external_id
|
||||||
output << " #@long_name" if @long_name
|
output << " #{@long_name.inspect}" if @long_name
|
||||||
output << " #@uri" if @uri
|
output << " #{@uri.inspect}" if @uri
|
||||||
unless @children.empty?
|
unless @children.empty?
|
||||||
next_indent = indent + 1
|
next_indent = indent + 1
|
||||||
output << ' ['
|
output << ' ['
|
||||||
child = nil # speed
|
child = nil # speed
|
||||||
@children.each { |child|
|
@children.each { |child|
|
||||||
output << "\n"
|
output << "\n"
|
||||||
child.write( output, next_indent )
|
f.write( child, output )
|
||||||
}
|
}
|
||||||
#output << ' '*next_indent
|
|
||||||
output << "\n]"
|
output << "\n]"
|
||||||
end
|
end
|
||||||
output << STOP
|
output << STOP
|
||||||
|
@ -219,8 +211,10 @@ module REXML
|
||||||
@string+'>'
|
@string+'>'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# == DEPRECATED
|
||||||
|
# See REXML::Formatters
|
||||||
|
#
|
||||||
def write( output, indent )
|
def write( output, indent )
|
||||||
output << (' '*indent) if indent > 0
|
|
||||||
output << to_s
|
output << to_s
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -264,7 +258,6 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def write( output, indent=-1 )
|
def write( output, indent=-1 )
|
||||||
output << (' '*indent) if indent > 0
|
|
||||||
output << to_s
|
output << to_s
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -31,9 +31,6 @@ module REXML
|
||||||
# to be sources of valid XML documents.
|
# to be sources of valid XML documents.
|
||||||
# @param context if supplied, contains the context of the document;
|
# @param context if supplied, contains the context of the document;
|
||||||
# this should be a Hash.
|
# this should be a Hash.
|
||||||
# NOTE that I'm not sure what the context is for; I cloned it out of
|
|
||||||
# the Electric XML API (in which it also seems to do nothing), and it
|
|
||||||
# is now legacy. It may do something, someday... it may disappear.
|
|
||||||
def initialize( source = nil, context = {} )
|
def initialize( source = nil, context = {} )
|
||||||
super()
|
super()
|
||||||
@context = context
|
@context = context
|
||||||
|
@ -142,14 +139,53 @@ module REXML
|
||||||
xml_decl().stand_alone?
|
xml_decl().stand_alone?
|
||||||
end
|
end
|
||||||
|
|
||||||
# Write the XML tree out, optionally with indent. This writes out the
|
# Write the XML tree out. This writes the entire XML document, including
|
||||||
# entire XML document, including XML declarations, doctype declarations,
|
# declarations and processing instructions.
|
||||||
# and processing instructions (if any are given).
|
#
|
||||||
# A controversial point is whether Document should always write the XML
|
# A controversial point is whether Document should always write the XML
|
||||||
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
||||||
# user (or source document). REXML does not write one if one was not
|
# user (or source document). REXML does not write one if one was not
|
||||||
# specified, because it adds unneccessary bandwidth to applications such
|
# specified, because it adds unneccessary bandwidth to applications such
|
||||||
# as XML-RPC.
|
# as XML-RPC.
|
||||||
|
#
|
||||||
|
# _Examples_
|
||||||
|
# Document.new("<a><b/></a>").serialize
|
||||||
|
#
|
||||||
|
# output_string = ""
|
||||||
|
# tr = Transitive.new( output_string )
|
||||||
|
# Document.new("<a><b/></a>").serialize( tr )
|
||||||
|
#
|
||||||
|
# formatter::
|
||||||
|
# One of the rexml/formatters classes. If none is given, then the Pretty
|
||||||
|
# formatter will be used to dump the XML to the STDOUT.
|
||||||
|
def serialize( formatter = nil )
|
||||||
|
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||||
|
output = Output.new( output, xml_decl.encoding )
|
||||||
|
end
|
||||||
|
|
||||||
|
formatter = REXML::Pretty.new( $stdout ) if (formatter.nil?)
|
||||||
|
|
||||||
|
@children.each { |node|
|
||||||
|
puts "node = #{node.inspect}"
|
||||||
|
indent( output, indent ) if node.node_type == :element
|
||||||
|
if node.write( output, indent, transitive, ie_hack )
|
||||||
|
output << "\n" unless indent<0 or node == @children[-1]
|
||||||
|
end
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# Write the XML tree out, optionally with indent. This writes out the
|
||||||
|
# entire XML document, including XML declarations, doctype declarations,
|
||||||
|
# and processing instructions (if any are given).
|
||||||
|
#
|
||||||
|
# A controversial point is whether Document should always write the XML
|
||||||
|
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
||||||
|
# user (or source document). REXML does not write one if one was not
|
||||||
|
# specified, because it adds unneccessary bandwidth to applications such
|
||||||
|
# as XML-RPC.
|
||||||
|
#
|
||||||
|
# See also the classes in the rexml/formatters package for the proper way
|
||||||
|
# to change the default formatting of XML output
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# output::
|
# output::
|
||||||
|
@ -160,7 +196,7 @@ module REXML
|
||||||
# indentation will be twice this number of spaces, and children will be
|
# indentation will be twice this number of spaces, and children will be
|
||||||
# indented an additional amount. For a value of 3, every item will be
|
# indented an additional amount. For a value of 3, every item will be
|
||||||
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
|
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
|
||||||
# transitive::
|
# trans::
|
||||||
# If transitive is true and indent is >= 0, then the output will be
|
# If transitive is true and indent is >= 0, then the output will be
|
||||||
# pretty-printed in such a way that the added whitespace does not affect
|
# pretty-printed in such a way that the added whitespace does not affect
|
||||||
# the absolute *value* of the document -- that is, it leaves the value
|
# the absolute *value* of the document -- that is, it leaves the value
|
||||||
|
@ -171,14 +207,20 @@ module REXML
|
||||||
# unable to parse proper XML, we have to provide a hack to generate XML
|
# unable to parse proper XML, we have to provide a hack to generate XML
|
||||||
# that IE's limited abilities can handle. This hack inserts a space
|
# that IE's limited abilities can handle. This hack inserts a space
|
||||||
# before the /> on empty tags. Defaults to false
|
# before the /> on empty tags. Defaults to false
|
||||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
|
||||||
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||||
@children.each { |node|
|
output = Output.new( output, xml_decl.encoding )
|
||||||
indent( output, indent ) if node.node_type == :element
|
end
|
||||||
if node.write( output, indent, transitive, ie_hack )
|
formatter = if indent > -1
|
||||||
output << "\n" unless indent<0 or node == @children[-1]
|
if transitive
|
||||||
|
REXML::Formatters::Transitive.new( indent, ie_hack )
|
||||||
|
else
|
||||||
|
REXML::Formatters::Pretty.new( indent, ie_hack )
|
||||||
|
end
|
||||||
|
else
|
||||||
|
REXML::Formatters::Default.new( ie_hack )
|
||||||
end
|
end
|
||||||
}
|
formatter.write( self, output )
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
2121
lib/rexml/element.rb
2121
lib/rexml/element.rb
File diff suppressed because it is too large
Load diff
|
@ -58,8 +58,8 @@ module REXML
|
||||||
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||||
return UTF_16 if /\A\xfe\xff/n =~ str
|
return UTF_16 if /\A\xfe\xff/n =~ str
|
||||||
return UNILE if /\A\xff\xfe/n =~ str
|
return UNILE if /\A\xff\xfe/n =~ str
|
||||||
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
|
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
|
||||||
return $1.upcase if $1
|
return $3.upcase if $3
|
||||||
return UTF_8
|
return UTF_8
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,9 +3,15 @@
|
||||||
#
|
#
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
@@__REXML_encoding_methods = %q~
|
register( "CP-1252" ) do |o|
|
||||||
|
class << o
|
||||||
|
alias encode encode_cp1252
|
||||||
|
alias decode decode_cp1252
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Convert from UTF-8
|
# Convert from UTF-8
|
||||||
def encode content
|
def encode_cp1252(content)
|
||||||
array_utf8 = content.unpack('U*')
|
array_utf8 = content.unpack('U*')
|
||||||
array_enc = []
|
array_enc = []
|
||||||
array_utf8.each do |num|
|
array_utf8.each do |num|
|
||||||
|
@ -54,7 +60,7 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convert to UTF-8
|
# Convert to UTF-8
|
||||||
def decode(str)
|
def decode_cp1252(str)
|
||||||
array_latin9 = str.unpack('C*')
|
array_latin9 = str.unpack('C*')
|
||||||
array_enc = []
|
array_enc = []
|
||||||
array_latin9.each do |num|
|
array_latin9.each do |num|
|
||||||
|
@ -93,6 +99,5 @@ module REXML
|
||||||
end
|
end
|
||||||
array_enc.pack('U*')
|
array_enc.pack('U*')
|
||||||
end
|
end
|
||||||
~
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,9 +3,13 @@
|
||||||
#
|
#
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
@@__REXML_encoding_methods = %q~
|
register("ISO-8859-15") do |o|
|
||||||
|
alias encode to_iso_8859_15
|
||||||
|
alias decode from_iso_8859_15
|
||||||
|
end
|
||||||
|
|
||||||
# Convert from UTF-8
|
# Convert from UTF-8
|
||||||
def to_iso_8859_15 content
|
def to_iso_8859_15(content)
|
||||||
array_utf8 = content.unpack('U*')
|
array_utf8 = content.unpack('U*')
|
||||||
array_enc = []
|
array_enc = []
|
||||||
array_utf8.each do |num|
|
array_utf8.each do |num|
|
||||||
|
@ -64,6 +68,5 @@ module REXML
|
||||||
end
|
end
|
||||||
array_enc.pack('U*')
|
array_enc.pack('U*')
|
||||||
end
|
end
|
||||||
~
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -89,6 +89,12 @@ module REXML
|
||||||
|
|
||||||
# Write out a fully formed, correct entity definition (assuming the Entity
|
# Write out a fully formed, correct entity definition (assuming the Entity
|
||||||
# object itself is valid.)
|
# object itself is valid.)
|
||||||
|
#
|
||||||
|
# out::
|
||||||
|
# An object implementing <TT><<<TT> to which the entity will be
|
||||||
|
# output
|
||||||
|
# indent::
|
||||||
|
# *DEPRECATED* and ignored
|
||||||
def write out, indent=-1
|
def write out, indent=-1
|
||||||
out << '<!ENTITY '
|
out << '<!ENTITY '
|
||||||
out << '% ' if @reference
|
out << '% ' if @reference
|
||||||
|
|
109
lib/rexml/formatters/default.rb
Normal file
109
lib/rexml/formatters/default.rb
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
module REXML
|
||||||
|
module Formatters
|
||||||
|
class Default
|
||||||
|
# Prints out the XML document with no formatting -- except if id_hack is
|
||||||
|
# set.
|
||||||
|
#
|
||||||
|
# ie_hack::
|
||||||
|
# If set to true, then inserts whitespace before the close of an empty
|
||||||
|
# tag, so that IE's bad XML parser doesn't choke.
|
||||||
|
def initialize( ie_hack=false )
|
||||||
|
@ie_hack = ie_hack
|
||||||
|
end
|
||||||
|
|
||||||
|
# Writes the node to some output.
|
||||||
|
#
|
||||||
|
# node::
|
||||||
|
# The node to write
|
||||||
|
# output::
|
||||||
|
# A class implementing <TT><<</TT>. Pass in an Output object to
|
||||||
|
# change the output encoding.
|
||||||
|
def write( node, output )
|
||||||
|
case node
|
||||||
|
|
||||||
|
when Document
|
||||||
|
if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
||||||
|
output = Output.new( output, node.xml_decl.encoding )
|
||||||
|
end
|
||||||
|
write_document( node, output )
|
||||||
|
|
||||||
|
when Element
|
||||||
|
write_element( node, output )
|
||||||
|
|
||||||
|
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
|
||||||
|
Attribute, AttlistDecl
|
||||||
|
node.write( output,-1 )
|
||||||
|
|
||||||
|
when Instruction
|
||||||
|
write_instruction( node, output )
|
||||||
|
|
||||||
|
when DocType, XMLDecl
|
||||||
|
node.write( output )
|
||||||
|
|
||||||
|
when Comment
|
||||||
|
write_comment( node, output )
|
||||||
|
|
||||||
|
when CData
|
||||||
|
write_cdata( node, output )
|
||||||
|
|
||||||
|
when Text
|
||||||
|
write_text( node, output )
|
||||||
|
|
||||||
|
else
|
||||||
|
raise Exception.new("XML FORMATTING ERROR")
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
def write_document( node, output )
|
||||||
|
node.children.each { |child| write( child, output ) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_element( node, output )
|
||||||
|
output << "<#{node.expanded_name}"
|
||||||
|
|
||||||
|
node.attributes.each_attribute do |attr|
|
||||||
|
output << " "
|
||||||
|
attr.write( output )
|
||||||
|
end unless node.attributes.empty?
|
||||||
|
|
||||||
|
if node.children.empty?
|
||||||
|
output << " " if @ie_hack
|
||||||
|
output << "/"
|
||||||
|
else
|
||||||
|
output << ">"
|
||||||
|
node.children.each { |child|
|
||||||
|
write( child, output )
|
||||||
|
}
|
||||||
|
output << "</#{node.expanded_name}"
|
||||||
|
end
|
||||||
|
output << ">"
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_text( node, output )
|
||||||
|
output << node.to_s()
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_comment( node, output )
|
||||||
|
output << Comment::START
|
||||||
|
output << node.to_s
|
||||||
|
output << Comment::STOP
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_cdata( node, output )
|
||||||
|
output << CData::START
|
||||||
|
output << node.to_s
|
||||||
|
output << CData::STOP
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_instruction( node, output )
|
||||||
|
output << Instruction::START.sub(/\\/u, '')
|
||||||
|
output << node.target
|
||||||
|
output << ' '
|
||||||
|
output << node.content
|
||||||
|
output << Instruction::STOP.sub(/\\/u, '')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
134
lib/rexml/formatters/pretty.rb
Normal file
134
lib/rexml/formatters/pretty.rb
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
require 'rexml/formatters/default'
|
||||||
|
|
||||||
|
module REXML
|
||||||
|
module Formatters
|
||||||
|
# Pretty-prints an XML document. This destroys whitespace in text nodes
|
||||||
|
# and will insert carriage returns and indentations.
|
||||||
|
#
|
||||||
|
# TODO: Add an option to print attributes on new lines
|
||||||
|
class Pretty < Default
|
||||||
|
|
||||||
|
# If compact is set to true, then the formatter will attempt to use as
|
||||||
|
# little space as possible
|
||||||
|
attr_accessor :compact
|
||||||
|
# The width of a page. Used for formatting text
|
||||||
|
attr_accessor :width
|
||||||
|
|
||||||
|
# Create a new pretty printer.
|
||||||
|
#
|
||||||
|
# output::
|
||||||
|
# An object implementing '<<(String)', to which the output will be written.
|
||||||
|
# indentation::
|
||||||
|
# An integer greater than 0. The indentation of each level will be
|
||||||
|
# this number of spaces. If this is < 1, the behavior of this object
|
||||||
|
# is undefined. Defaults to 2.
|
||||||
|
# ie_hack::
|
||||||
|
# If true, the printer will insert whitespace before closing empty
|
||||||
|
# tags, thereby allowing Internet Explorer's feeble XML parser to
|
||||||
|
# function. Defaults to false.
|
||||||
|
def initialize( indentation=2, ie_hack=false )
|
||||||
|
@indentation = indentation
|
||||||
|
@level = 0
|
||||||
|
@ie_hack = ie_hack
|
||||||
|
@width = 80
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
def write_element(node, output)
|
||||||
|
output << ' '*@level
|
||||||
|
output << "<#{node.expanded_name}"
|
||||||
|
|
||||||
|
node.attributes.each_attribute do |attr|
|
||||||
|
output << " "
|
||||||
|
attr.write( output )
|
||||||
|
end unless node.attributes.empty?
|
||||||
|
|
||||||
|
if node.children.empty?
|
||||||
|
if @ie_hack
|
||||||
|
output << " "
|
||||||
|
end
|
||||||
|
output << "/"
|
||||||
|
else
|
||||||
|
output << ">"
|
||||||
|
# If compact and all children are text, and if the formatted output
|
||||||
|
# is less than the specified width, then try to print everything on
|
||||||
|
# one line
|
||||||
|
skip = false
|
||||||
|
if compact
|
||||||
|
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
|
||||||
|
string = ""
|
||||||
|
node.children.each { |child| write( child, string, 0 ) }
|
||||||
|
if string.length + @level < @width
|
||||||
|
output << string
|
||||||
|
skip = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
unless skip
|
||||||
|
output << "\n"
|
||||||
|
@level += @indentation
|
||||||
|
node.children.each { |child|
|
||||||
|
next if child.kind_of?(Text) and child.to_s.strip.length == 0
|
||||||
|
write( child, output )
|
||||||
|
output << "\n"
|
||||||
|
}
|
||||||
|
@level -= @indentation
|
||||||
|
output << ' '*@level
|
||||||
|
end
|
||||||
|
output << "</#{node.expanded_name}"
|
||||||
|
end
|
||||||
|
output << ">"
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_text( node, output )
|
||||||
|
s = node.to_s()
|
||||||
|
s.gsub!(/\s/,' ')
|
||||||
|
s.squeeze!(" ")
|
||||||
|
s = wrap(s, 80-@level)
|
||||||
|
s = indent_text(s, @level, " ", true)
|
||||||
|
output << (' '*@level + s)
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_comment( node, output)
|
||||||
|
output << ' ' * @level
|
||||||
|
super
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_cdata( node, output)
|
||||||
|
output << ' ' * @level
|
||||||
|
super
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_document( node, output )
|
||||||
|
# Ok, this is a bit odd. All XML documents have an XML declaration,
|
||||||
|
# but it may not write itself if the user didn't specifically add it,
|
||||||
|
# either through the API or in the input document. If it doesn't write
|
||||||
|
# itself, then we don't need a carriage return... which makes this
|
||||||
|
# logic more complex.
|
||||||
|
node.children.each { |child|
|
||||||
|
next if child == node.children[-1] and child.instance_of?(Text)
|
||||||
|
unless child == node.children[0] or child.instance_of?(Text) or
|
||||||
|
(child == node.children[1] and !node.children[0].writethis)
|
||||||
|
output << "\n"
|
||||||
|
end
|
||||||
|
write( child, output )
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
||||||
|
return string if level < 0
|
||||||
|
string.gsub(/\n/, "\n#{style*level}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def wrap(string, width)
|
||||||
|
# Recursivly wrap string at width.
|
||||||
|
return string if string.length <= width
|
||||||
|
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
||||||
|
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
56
lib/rexml/formatters/transitive.rb
Normal file
56
lib/rexml/formatters/transitive.rb
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
require 'rexml/formatters/pretty'
|
||||||
|
|
||||||
|
module REXML
|
||||||
|
module Formatters
|
||||||
|
# The Transitive formatter writes an XML document that parses to an
|
||||||
|
# identical document as the source document. This means that no extra
|
||||||
|
# whitespace nodes are inserted, and whitespace within text nodes is
|
||||||
|
# preserved. Within these constraints, the document is pretty-printed,
|
||||||
|
# with whitespace inserted into the metadata to introduce formatting.
|
||||||
|
#
|
||||||
|
# Note that this is only useful if the original XML is not already
|
||||||
|
# formatted. Since this formatter does not alter whitespace nodes, the
|
||||||
|
# results of formatting already formatted XML will be odd.
|
||||||
|
class Transitive < Default
|
||||||
|
def initialize( indentation=2 )
|
||||||
|
@indentation = indentation
|
||||||
|
@level = 0
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
def write_element( node, output )
|
||||||
|
output << "<#{node.expanded_name}"
|
||||||
|
|
||||||
|
node.attributes.each_attribute do |attr|
|
||||||
|
output << " "
|
||||||
|
attr.write( output )
|
||||||
|
end unless node.attributes.empty?
|
||||||
|
|
||||||
|
output << "\n"
|
||||||
|
output << ' '*@level
|
||||||
|
if node.children.empty?
|
||||||
|
output << "/"
|
||||||
|
else
|
||||||
|
output << ">"
|
||||||
|
# If compact and all children are text, and if the formatted output
|
||||||
|
# is less than the specified width, then try to print everything on
|
||||||
|
# one line
|
||||||
|
skip = false
|
||||||
|
@level += @indentation
|
||||||
|
node.children.each { |child|
|
||||||
|
write( child, output )
|
||||||
|
}
|
||||||
|
@level -= @indentation
|
||||||
|
output << "</#{node.expanded_name}"
|
||||||
|
output << "\n"
|
||||||
|
output << ' '*@level
|
||||||
|
end
|
||||||
|
output << ">"
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_text( node, output )
|
||||||
|
output << node.to_s()
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -38,7 +38,11 @@ module REXML
|
||||||
Instruction.new self
|
Instruction.new self
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# == DEPRECATED
|
||||||
|
# See the rexml/formatters package
|
||||||
|
#
|
||||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
def write writer, indent=-1, transitive=false, ie_hack=false
|
||||||
|
Kernel.warn( "#{self.class.name}.write is deprecated" )
|
||||||
indent(writer, indent)
|
indent(writer, indent)
|
||||||
writer << START.sub(/\\/u, '')
|
writer << START.sub(/\\/u, '')
|
||||||
writer << @target
|
writer << @target
|
||||||
|
|
|
@ -18,10 +18,19 @@ module REXML
|
||||||
@parent[ ind - 1 ]
|
@parent[ ind - 1 ]
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_s indent=-1
|
# indent::
|
||||||
rv = ""
|
# *DEPRECATED* This parameter is now ignored. See the formatters in the
|
||||||
write rv,indent
|
# REXML::Formatters package for changing the output style.
|
||||||
rv
|
def to_s indent=nil
|
||||||
|
unless indent.nil?
|
||||||
|
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
|
||||||
|
f = REXML::Formatters::Pretty.new( indent )
|
||||||
|
f.write( self, rv, indent )
|
||||||
|
else
|
||||||
|
f = REXML::Formatters::Default.new
|
||||||
|
f.write( self, rv = "" )
|
||||||
|
end
|
||||||
|
return rv
|
||||||
end
|
end
|
||||||
|
|
||||||
def indent to, ind
|
def indent to, ind
|
||||||
|
|
|
@ -53,7 +53,7 @@ module REXML
|
||||||
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
|
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
|
||||||
|
|
||||||
ENTITY_START = /^\s*<!ENTITY/
|
ENTITY_START = /^\s*<!ENTITY/
|
||||||
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
|
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
||||||
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
||||||
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
||||||
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
||||||
|
@ -217,10 +217,10 @@ module REXML
|
||||||
close = md[2]
|
close = md[2]
|
||||||
identity =~ IDENTITY
|
identity =~ IDENTITY
|
||||||
name = $1
|
name = $1
|
||||||
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
|
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
|
||||||
pub_sys = $2.nil? ? nil : $2.strip
|
pub_sys = $2.nil? ? nil : $2.strip
|
||||||
long_name = $3.nil? ? nil : $3.strip
|
long_name = $4.nil? ? nil : $4.strip
|
||||||
uri = $4.nil? ? nil : $4.strip
|
uri = $6.nil? ? nil : $6.strip
|
||||||
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
||||||
if close == ">"
|
if close == ">"
|
||||||
@document_status = :after_doctype
|
@document_status = :after_doctype
|
||||||
|
|
|
@ -94,6 +94,8 @@ module REXML
|
||||||
when :end_document
|
when :end_document
|
||||||
handle( :end_document )
|
handle( :end_document )
|
||||||
break
|
break
|
||||||
|
when :start_doctype
|
||||||
|
handle( :doctype, *event[1..-1])
|
||||||
when :end_doctype
|
when :end_doctype
|
||||||
context = context[1]
|
context = context[1]
|
||||||
when :start_element
|
when :start_element
|
||||||
|
@ -167,7 +169,7 @@ module REXML
|
||||||
when :entitydecl
|
when :entitydecl
|
||||||
@entities[ event[1] ] = event[2] if event.size == 3
|
@entities[ event[1] ] = event[2] if event.size == 3
|
||||||
handle( *event )
|
handle( *event )
|
||||||
when :processing_instruction, :comment, :doctype, :attlistdecl,
|
when :processing_instruction, :comment, :attlistdecl,
|
||||||
:elementdecl, :cdata, :notationdecl, :xmldecl
|
:elementdecl, :cdata, :notationdecl, :xmldecl
|
||||||
handle( *event )
|
handle( *event )
|
||||||
end
|
end
|
||||||
|
|
|
@ -551,7 +551,7 @@ module REXML
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
#puts "BEFORE WITH '#{rest}'"
|
#puts "BEFORE WITH '#{rest}'"
|
||||||
rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
|
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
|
||||||
parsed.concat(n)
|
parsed.concat(n)
|
||||||
return rest
|
return rest
|
||||||
end
|
end
|
||||||
|
|
|
@ -10,8 +10,8 @@
|
||||||
#
|
#
|
||||||
# Main page:: http://www.germane-software.com/software/rexml
|
# Main page:: http://www.germane-software.com/software/rexml
|
||||||
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
||||||
# Version:: 3.1.5
|
# Version:: 3.1.7
|
||||||
# Date:: 2006/250
|
# Date:: 2007/206
|
||||||
#
|
#
|
||||||
# This API documentation can be downloaded from the REXML home page, or can
|
# This API documentation can be downloaded from the REXML home page, or can
|
||||||
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
||||||
|
@ -20,9 +20,10 @@
|
||||||
# or can be accessed
|
# or can be accessed
|
||||||
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
||||||
module REXML
|
module REXML
|
||||||
COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>"
|
COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
|
||||||
DATE = "2006/250"
|
DATE = "2007/206"
|
||||||
VERSION = "3.1.5"
|
VERSION = "3.1.7"
|
||||||
|
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
|
||||||
|
|
||||||
Copyright = COPYRIGHT
|
Copyright = COPYRIGHT
|
||||||
Version = VERSION
|
Version = VERSION
|
||||||
|
|
|
@ -1,139 +1,139 @@
|
||||||
require 'rexml/encoding'
|
require 'rexml/encoding'
|
||||||
|
|
||||||
module REXML
|
module REXML
|
||||||
# Generates Source-s. USE THIS CLASS.
|
# Generates Source-s. USE THIS CLASS.
|
||||||
class SourceFactory
|
class SourceFactory
|
||||||
# Generates a Source object
|
# Generates a Source object
|
||||||
# @param arg Either a String, or an IO
|
# @param arg Either a String, or an IO
|
||||||
# @return a Source, or nil if a bad argument was given
|
# @return a Source, or nil if a bad argument was given
|
||||||
def SourceFactory::create_from(arg)
|
def SourceFactory::create_from(arg)
|
||||||
if arg.kind_of? String
|
if arg.kind_of? String
|
||||||
Source.new(arg)
|
Source.new(arg)
|
||||||
elsif arg.respond_to? :read and
|
elsif arg.respond_to? :read and
|
||||||
arg.respond_to? :readline and
|
arg.respond_to? :readline and
|
||||||
arg.respond_to? :nil? and
|
arg.respond_to? :nil? and
|
||||||
arg.respond_to? :eof?
|
arg.respond_to? :eof?
|
||||||
IOSource.new(arg)
|
IOSource.new(arg)
|
||||||
elsif arg.kind_of? Source
|
elsif arg.kind_of? Source
|
||||||
arg
|
arg
|
||||||
else
|
else
|
||||||
raise "#{source.class} is not a valid input stream. It must walk \n"+
|
raise "#{source.class} is not a valid input stream. It must walk \n"+
|
||||||
"like either a String, IO, or Source."
|
"like either a String, IO, or Source."
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# A Source can be searched for patterns, and wraps buffers and other
|
# A Source can be searched for patterns, and wraps buffers and other
|
||||||
# objects and provides consumption of text
|
# objects and provides consumption of text
|
||||||
class Source
|
class Source
|
||||||
include Encoding
|
include Encoding
|
||||||
# The current buffer (what we're going to read next)
|
# The current buffer (what we're going to read next)
|
||||||
attr_reader :buffer
|
attr_reader :buffer
|
||||||
# The line number of the last consumed text
|
# The line number of the last consumed text
|
||||||
attr_reader :line
|
attr_reader :line
|
||||||
attr_reader :encoding
|
attr_reader :encoding
|
||||||
|
|
||||||
# Constructor
|
# Constructor
|
||||||
# @param arg must be a String, and should be a valid XML document
|
# @param arg must be a String, and should be a valid XML document
|
||||||
# @param encoding if non-null, sets the encoding of the source to this
|
# @param encoding if non-null, sets the encoding of the source to this
|
||||||
# value, overriding all encoding detection
|
# value, overriding all encoding detection
|
||||||
def initialize(arg, encoding=nil)
|
def initialize(arg, encoding=nil)
|
||||||
@orig = @buffer = arg
|
@orig = @buffer = arg
|
||||||
if encoding
|
if encoding
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
else
|
else
|
||||||
self.encoding = check_encoding( @buffer )
|
self.encoding = check_encoding( @buffer )
|
||||||
end
|
end
|
||||||
@line = 0
|
@line = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# Inherited from Encoding
|
# Inherited from Encoding
|
||||||
# Overridden to support optimized en/decoding
|
# Overridden to support optimized en/decoding
|
||||||
def encoding=(enc)
|
def encoding=(enc)
|
||||||
return unless super
|
return unless super
|
||||||
@line_break = encode( '>' )
|
@line_break = encode( '>' )
|
||||||
if enc != UTF_8
|
if enc != UTF_8
|
||||||
@buffer = decode(@buffer)
|
@buffer = decode(@buffer)
|
||||||
@to_utf = true
|
@to_utf = true
|
||||||
else
|
else
|
||||||
@to_utf = false
|
@to_utf = false
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Scans the source for a given pattern. Note, that this is not your
|
# Scans the source for a given pattern. Note, that this is not your
|
||||||
# usual scan() method. For one thing, the pattern argument has some
|
# usual scan() method. For one thing, the pattern argument has some
|
||||||
# requirements; for another, the source can be consumed. You can easily
|
# requirements; for another, the source can be consumed. You can easily
|
||||||
# confuse this method. Originally, the patterns were easier
|
# confuse this method. Originally, the patterns were easier
|
||||||
# to construct and this method more robust, because this method
|
# to construct and this method more robust, because this method
|
||||||
# generated search regexes on the fly; however, this was
|
# generated search regexes on the fly; however, this was
|
||||||
# computationally expensive and slowed down the entire REXML package
|
# computationally expensive and slowed down the entire REXML package
|
||||||
# considerably, since this is by far the most commonly called method.
|
# considerably, since this is by far the most commonly called method.
|
||||||
# @param pattern must be a Regexp, and must be in the form of
|
# @param pattern must be a Regexp, and must be in the form of
|
||||||
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
||||||
# will be returned; the second group is used if the consume flag is
|
# will be returned; the second group is used if the consume flag is
|
||||||
# set.
|
# set.
|
||||||
# @param consume if true, the pattern returned will be consumed, leaving
|
# @param consume if true, the pattern returned will be consumed, leaving
|
||||||
# everything after it in the Source.
|
# everything after it in the Source.
|
||||||
# @return the pattern, if found, or nil if the Source is empty or the
|
# @return the pattern, if found, or nil if the Source is empty or the
|
||||||
# pattern is not found.
|
# pattern is not found.
|
||||||
def scan(pattern, cons=false)
|
def scan(pattern, cons=false)
|
||||||
return nil if @buffer.nil?
|
return nil if @buffer.nil?
|
||||||
rv = @buffer.scan(pattern)
|
rv = @buffer.scan(pattern)
|
||||||
@buffer = $' if cons and rv.size>0
|
@buffer = $' if cons and rv.size>0
|
||||||
rv
|
rv
|
||||||
end
|
end
|
||||||
|
|
||||||
def read
|
def read
|
||||||
end
|
end
|
||||||
|
|
||||||
def consume( pattern )
|
def consume( pattern )
|
||||||
@buffer = $' if pattern.match( @buffer )
|
@buffer = $' if pattern.match( @buffer )
|
||||||
end
|
end
|
||||||
|
|
||||||
def match_to( char, pattern )
|
def match_to( char, pattern )
|
||||||
return pattern.match(@buffer)
|
return pattern.match(@buffer)
|
||||||
end
|
end
|
||||||
|
|
||||||
def match_to_consume( char, pattern )
|
def match_to_consume( char, pattern )
|
||||||
md = pattern.match(@buffer)
|
md = pattern.match(@buffer)
|
||||||
@buffer = $'
|
@buffer = $'
|
||||||
return md
|
return md
|
||||||
end
|
end
|
||||||
|
|
||||||
def match(pattern, cons=false)
|
def match(pattern, cons=false)
|
||||||
md = pattern.match(@buffer)
|
md = pattern.match(@buffer)
|
||||||
@buffer = $' if cons and md
|
@buffer = $' if cons and md
|
||||||
return md
|
return md
|
||||||
end
|
end
|
||||||
|
|
||||||
# @return true if the Source is exhausted
|
# @return true if the Source is exhausted
|
||||||
def empty?
|
def empty?
|
||||||
@buffer == ""
|
@buffer == ""
|
||||||
end
|
end
|
||||||
|
|
||||||
def position
|
def position
|
||||||
@orig.index( @buffer )
|
@orig.index( @buffer )
|
||||||
end
|
end
|
||||||
|
|
||||||
# @return the current line in the source
|
# @return the current line in the source
|
||||||
def current_line
|
def current_line
|
||||||
lines = @orig.split
|
lines = @orig.split
|
||||||
res = lines.grep @buffer[0..30]
|
res = lines.grep @buffer[0..30]
|
||||||
res = res[-1] if res.kind_of? Array
|
res = res[-1] if res.kind_of? Array
|
||||||
lines.index( res ) if res
|
lines.index( res ) if res
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# A Source that wraps an IO. See the Source class for method
|
# A Source that wraps an IO. See the Source class for method
|
||||||
# documentation
|
# documentation
|
||||||
class IOSource < Source
|
class IOSource < Source
|
||||||
#attr_reader :block_size
|
#attr_reader :block_size
|
||||||
|
|
||||||
# block_size has been deprecated
|
# block_size has been deprecated
|
||||||
def initialize(arg, block_size=500, encoding=nil)
|
def initialize(arg, block_size=500, encoding=nil)
|
||||||
@er_source = @source = arg
|
@er_source = @source = arg
|
||||||
@to_utf = false
|
@to_utf = false
|
||||||
# Determining the encoding is a deceptively difficult issue to resolve.
|
# Determining the encoding is a deceptively difficult issue to resolve.
|
||||||
# First, we check the first two bytes for UTF-16. Then we
|
# First, we check the first two bytes for UTF-16. Then we
|
||||||
# assume that the encoding is at least ASCII enough for the '>', and
|
# assume that the encoding is at least ASCII enough for the '>', and
|
||||||
|
@ -147,86 +147,89 @@ module REXML
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
|
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
|
||||||
self.encoding = check_encoding( str )
|
self.encoding = check_encoding( str )
|
||||||
|
elsif (0xef == str[0] && 0xbb == str[1])
|
||||||
|
str += @source.read(1)
|
||||||
|
str = '' if (0xbf == str[2])
|
||||||
else
|
else
|
||||||
@line_break = '>'
|
@line_break = '>'
|
||||||
end
|
end
|
||||||
super str+@source.readline( @line_break )
|
super str+@source.readline( @line_break )
|
||||||
end
|
end
|
||||||
|
|
||||||
def scan(pattern, cons=false)
|
def scan(pattern, cons=false)
|
||||||
rv = super
|
rv = super
|
||||||
# You'll notice that this next section is very similar to the same
|
# You'll notice that this next section is very similar to the same
|
||||||
# section in match(), but just a liiittle different. This is
|
# section in match(), but just a liiittle different. This is
|
||||||
# because it is a touch faster to do it this way with scan()
|
# because it is a touch faster to do it this way with scan()
|
||||||
# than the way match() does it; enough faster to warrent duplicating
|
# than the way match() does it; enough faster to warrent duplicating
|
||||||
# some code
|
# some code
|
||||||
if rv.size == 0
|
if rv.size == 0
|
||||||
until @buffer =~ pattern or @source.nil?
|
until @buffer =~ pattern or @source.nil?
|
||||||
begin
|
begin
|
||||||
# READLINE OPT
|
# READLINE OPT
|
||||||
#str = @source.read(@block_size)
|
#str = @source.read(@block_size)
|
||||||
str = @source.readline(@line_break)
|
str = @source.readline(@line_break)
|
||||||
str = decode(str) if @to_utf and str
|
str = decode(str) if @to_utf and str
|
||||||
@buffer << str
|
@buffer << str
|
||||||
rescue Iconv::IllegalSequence
|
rescue Iconv::IllegalSequence
|
||||||
raise
|
raise
|
||||||
rescue
|
rescue
|
||||||
@source = nil
|
@source = nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
rv = super
|
rv = super
|
||||||
end
|
end
|
||||||
rv.taint
|
rv.taint
|
||||||
rv
|
rv
|
||||||
end
|
end
|
||||||
|
|
||||||
def read
|
def read
|
||||||
begin
|
begin
|
||||||
str = @source.readline(@line_break)
|
str = @source.readline(@line_break)
|
||||||
str = decode(str) if @to_utf and str
|
str = decode(str) if @to_utf and str
|
||||||
@buffer << str
|
@buffer << str
|
||||||
rescue Exception, NameError
|
rescue Exception, NameError
|
||||||
@source = nil
|
@source = nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def consume( pattern )
|
def consume( pattern )
|
||||||
match( pattern, true )
|
match( pattern, true )
|
||||||
end
|
end
|
||||||
|
|
||||||
def match( pattern, cons=false )
|
def match( pattern, cons=false )
|
||||||
rv = pattern.match(@buffer)
|
rv = pattern.match(@buffer)
|
||||||
@buffer = $' if cons and rv
|
@buffer = $' if cons and rv
|
||||||
while !rv and @source
|
while !rv and @source
|
||||||
begin
|
begin
|
||||||
str = @source.readline(@line_break)
|
str = @source.readline(@line_break)
|
||||||
str = decode(str) if @to_utf and str
|
str = decode(str) if @to_utf and str
|
||||||
@buffer << str
|
@buffer << str
|
||||||
rv = pattern.match(@buffer)
|
rv = pattern.match(@buffer)
|
||||||
@buffer = $' if cons and rv
|
@buffer = $' if cons and rv
|
||||||
rescue
|
rescue
|
||||||
@source = nil
|
@source = nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
rv.taint
|
rv.taint
|
||||||
rv
|
rv
|
||||||
end
|
end
|
||||||
|
|
||||||
def empty?
|
def empty?
|
||||||
super and ( @source.nil? || @source.eof? )
|
super and ( @source.nil? || @source.eof? )
|
||||||
end
|
end
|
||||||
|
|
||||||
def position
|
def position
|
||||||
@er_source.stat.pipe? ? 0 : @er_source.pos
|
@er_source.stat.pipe? ? 0 : @er_source.pos
|
||||||
end
|
end
|
||||||
|
|
||||||
# @return the current line in the source
|
# @return the current line in the source
|
||||||
def current_line
|
def current_line
|
||||||
begin
|
begin
|
||||||
pos = @er_source.pos # The byte position in the source
|
pos = @er_source.pos # The byte position in the source
|
||||||
lineno = @er_source.lineno # The XML < position in the source
|
lineno = @er_source.lineno # The XML < position in the source
|
||||||
@er_source.rewind
|
@er_source.rewind
|
||||||
line = 0 # The \r\n position in the source
|
line = 0 # The \r\n position in the source
|
||||||
begin
|
begin
|
||||||
while @er_source.pos < pos
|
while @er_source.pos < pos
|
||||||
@er_source.readline
|
@er_source.readline
|
||||||
|
@ -238,7 +241,7 @@ module REXML
|
||||||
pos = -1
|
pos = -1
|
||||||
line = -1
|
line = -1
|
||||||
end
|
end
|
||||||
[pos, lineno, line]
|
[pos, lineno, line]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -211,16 +211,17 @@ module REXML
|
||||||
return new_string
|
return new_string
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# == DEPRECATED
|
||||||
|
# See REXML::Formatters
|
||||||
|
#
|
||||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
||||||
s = to_s()
|
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
||||||
if not (@parent and @parent.whitespace) then
|
formatter = if indent > -1
|
||||||
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
|
REXML::Formatters::Pretty.new( indent )
|
||||||
if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
|
else
|
||||||
s = indent_text(s, indent, @parent.context[:indentstyle], false)
|
REXML::Formatters::Default.new
|
||||||
end
|
end
|
||||||
s.squeeze!(" \n\t") if @parent and !@parent.whitespace
|
formatter.write( self, writer )
|
||||||
end
|
|
||||||
writer << s
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# FIXME
|
# FIXME
|
||||||
|
|
|
@ -13,7 +13,7 @@ module REXML
|
||||||
STOP = '\?>';
|
STOP = '\?>';
|
||||||
|
|
||||||
attr_accessor :version, :standalone
|
attr_accessor :version, :standalone
|
||||||
attr_reader :writeencoding
|
attr_reader :writeencoding, :writethis
|
||||||
|
|
||||||
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
||||||
@writethis = true
|
@writethis = true
|
||||||
|
@ -37,9 +37,14 @@ module REXML
|
||||||
XMLDecl.new(self)
|
XMLDecl.new(self)
|
||||||
end
|
end
|
||||||
|
|
||||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
# indent::
|
||||||
|
# Ignored. There must be no whitespace before an XML declaration
|
||||||
|
# transitive::
|
||||||
|
# Ignored
|
||||||
|
# ie_hack::
|
||||||
|
# Ignored
|
||||||
|
def write(writer, indent=-1, transitive=false, ie_hack=false)
|
||||||
return nil unless @writethis or writer.kind_of? Output
|
return nil unless @writethis or writer.kind_of? Output
|
||||||
indent( writer, indent )
|
|
||||||
writer << START.sub(/\\/u, '')
|
writer << START.sub(/\\/u, '')
|
||||||
if writer.kind_of? Output
|
if writer.kind_of? Output
|
||||||
writer << " #{content writer.encoding}"
|
writer << " #{content writer.encoding}"
|
||||||
|
|
|
@ -352,7 +352,8 @@ module REXML
|
||||||
when :following_sibling
|
when :following_sibling
|
||||||
#puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
|
#puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
|
||||||
results = []
|
results = []
|
||||||
for node in nodeset
|
nodeset.each do |node|
|
||||||
|
next if node.parent.nil?
|
||||||
all_siblings = node.parent.children
|
all_siblings = node.parent.children
|
||||||
current_index = all_siblings.index( node )
|
current_index = all_siblings.index( node )
|
||||||
following_siblings = all_siblings[ current_index+1 .. -1 ]
|
following_siblings = all_siblings[ current_index+1 .. -1 ]
|
||||||
|
@ -363,13 +364,14 @@ module REXML
|
||||||
|
|
||||||
when :preceding_sibling
|
when :preceding_sibling
|
||||||
results = []
|
results = []
|
||||||
for node in nodeset
|
nodeset.each do |node|
|
||||||
|
next if node.parent.nil?
|
||||||
all_siblings = node.parent.children
|
all_siblings = node.parent.children
|
||||||
current_index = all_siblings.index( node )
|
current_index = all_siblings.index( node )
|
||||||
preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse
|
preceding_siblings = all_siblings[ 0, current_index ].reverse
|
||||||
#results += expr( path_stack.dclone, preceding_siblings )
|
results += preceding_siblings
|
||||||
end
|
end
|
||||||
nodeset = preceding_siblings || []
|
nodeset = results
|
||||||
node_types = ELEMENTS
|
node_types = ELEMENTS
|
||||||
|
|
||||||
when :preceding
|
when :preceding
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue