1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* Added the lower-case Shift-JIS files to the manifest. The upper-case ones

should be deprecated, but I need a Shift-JIS encoded XML file to test
  against, first.
* Added support for maintaining external entity occurances in DTDs
* Deprecated the use of Document::DECLARATION.  The new default declaration
  can be gotten with XMLDecl::default()
* Refactored the encoding support code.  It should be more robust now,
  and fixes a few bugs.
* The XPath string() function now deals with Element nodes properly.
* Serialization with Output objects now works as would be expected.
* Various code cleanups, some reducing the number of warnings that Ruby 1.8.x
  produces with REXML.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5144 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2003-12-09 02:41:33 +00:00
parent 31963249b9
commit e6636fe890
21 changed files with 817 additions and 722 deletions

View file

@ -163,6 +163,20 @@ module REXML
end end
end end
class ExternalEntity < Child
def initialize( src )
super()
@entity = src
end
def to_s
@entity
end
def write( output, indent )
output << @entity
output << "\n"
end
end
class NotationDecl < Child class NotationDecl < Child
def initialize name, middle, rest def initialize name, middle, rest
@name = name @name = name

View file

@ -19,7 +19,9 @@ module REXML
class Document < Element class Document < Element
# A convenient default XML declaration. If you want an XML declaration, # A convenient default XML declaration. If you want an XML declaration,
# the easiest way to add one is mydoc << Document::DECLARATION # the easiest way to add one is mydoc << Document::DECLARATION
DECLARATION = XMLDecl.new( "1.0", "UTF-8" ) # +DEPRECATED+
# Use: mydoc << XMLDecl.default
DECLARATION = XMLDecl.default
# Constructor # Constructor
# @param source if supplied, must be a Document, String, or IO. # @param source if supplied, must be a Document, String, or IO.
@ -102,30 +104,27 @@ module REXML
# @return the XMLDecl of this document; if no XMLDecl has been # @return the XMLDecl of this document; if no XMLDecl has been
# set, the default declaration is returned. # set, the default declaration is returned.
def xml_decl def xml_decl
rv = @children.find { |item| item.kind_of? XMLDecl } rv = @children[0]
rv = DECLARATION if rv.nil? return rv if rv.kind_of? XMLDecl
rv rv = @children.unshift(XMLDecl.default)[0]
end end
# @return the XMLDecl version of this document as a String. # @return the XMLDecl version of this document as a String.
# If no XMLDecl has been set, returns the default version. # If no XMLDecl has been set, returns the default version.
def version def version
decl = xml_decl() xml_decl().version
decl.nil? ? XMLDecl.DEFAULT_VERSION : decl.version
end end
# @return the XMLDecl encoding of this document as a String. # @return the XMLDecl encoding of this document as a String.
# If no XMLDecl has been set, returns the default encoding. # If no XMLDecl has been set, returns the default encoding.
def encoding def encoding
decl = xml_decl() xml_decl().encoding
decl.nil? or decl.encoding.nil? ? XMLDecl.DEFAULT_ENCODING : decl.encoding
end end
# @return the XMLDecl standalone value of this document as a String. # @return the XMLDecl standalone value of this document as a String.
# If no XMLDecl has been set, returns the default setting. # If no XMLDecl has been set, returns the default setting.
def stand_alone? def stand_alone?
decl = xml_decl() xml_decl().stand_alone?
decl.nil? ? XMLDecl.DEFAULT_STANDALONE : decl.stand_alone?
end end
# Write the XML tree out, optionally with indent. This writes out the # Write the XML tree out, optionally with indent. This writes out the
@ -154,8 +153,9 @@ module REXML
# that IE's limited abilities can handle. This hack inserts a space # that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false # before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
@children.each { |node| @children.each { |node|
indent( output, indent) if node.node_type == :element
node.write( output, indent, transitive, ie_hack ) node.write( output, indent, transitive, ie_hack )
output << "\n" unless indent<0 or node == @children[-1] output << "\n" unless indent<0 or node == @children[-1]
} }
@ -218,6 +218,9 @@ module REXML
when :attlistdecl when :attlistdecl
n = AttlistDecl.new( event[1..-1] ) n = AttlistDecl.new( event[1..-1] )
build_context.add( n ) build_context.add( n )
when :externalentity
n = ExternalEntity.new( event[1] )
build_context.add( n )
when :elementdecl when :elementdecl
n = ElementDecl.new( event[1] ) n = ElementDecl.new( event[1] )
build_context.add(n) build_context.add(n)

View file

@ -19,12 +19,17 @@ module REXML
@encoding = enc.upcase @encoding = enc.upcase
begin begin
load 'rexml/encodings/ICONV.rb' load 'rexml/encodings/ICONV.rb'
instance_eval @@__REXML_encoding_methods
Iconv::iconv( UTF_8, @encoding, "" ) Iconv::iconv( UTF_8, @encoding, "" )
rescue LoadError, Exception => err rescue LoadError, Exception => err
raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
begin begin
load enc_file load enc_file
instance_eval @@__REXML_encoding_methods
rescue LoadError rescue LoadError
puts $!.message
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." ) raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
end end
end end
@ -32,6 +37,7 @@ module REXML
enc = UTF_8 enc = UTF_8
@encoding = enc.upcase @encoding = enc.upcase
load 'rexml/encodings/UTF-8.rb' load 'rexml/encodings/UTF-8.rb'
instance_eval @@__REXML_encoding_methods
end end
ensure ensure
$VERBOSE = old_verbosity $VERBOSE = old_verbosity

View file

@ -1,22 +1,9 @@
begin
require 'uconv'
module REXML
module Encoding
def decode(str)
return Uconv::euctou8(str)
end
def encode content
return Uconv::u8toeuc(content)
end
end
end
rescue LoadError
begin begin
require 'iconv' require 'iconv'
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode(str) def decode(str)
return Iconv::iconv("utf-8", "euc-jp", str)[0] return Iconv::iconv("utf-8", "euc-jp", str)[0]
end end
@ -24,6 +11,24 @@ rescue LoadError
def encode content def encode content
return Iconv::iconv("euc-jp", "utf-8", content)[0] return Iconv::iconv("euc-jp", "utf-8", content)[0]
end end
EOL
end
end
rescue LoadError
begin
require 'uconv'
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode(str)
return Uconv::euctou8(str)
end
def encode content
return Uconv::u8toeuc(content)
end
EOL
end end
end end
rescue LoadError rescue LoadError

View file

@ -3,12 +3,14 @@ raise LoadError unless defined? Iconv
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode( str ) def decode( str )
return Iconv::iconv(UTF_8, @encoding, str)[0] return Iconv::iconv("utf-8", @encoding, str)[0]
end end
def encode( content ) def encode( content )
return Iconv::iconv(@encoding, UTF_8, content)[0] return Iconv::iconv(@encoding, "utf-8", content)[0]
end end
EOL
end end
end end

View file

@ -1,5 +1,6 @@
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods =<<-EOL
# Convert from UTF-8 # Convert from UTF-8
def encode content def encode content
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
@ -19,5 +20,6 @@ module REXML
def decode(str) def decode(str)
str.unpack('C*').pack('U*') str.unpack('C*').pack('U*')
end end
EOL
end end
end end

View file

@ -1,5 +1,6 @@
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_string =<<-EOL
def encode content def encode content
array_utf8 = content.unpack("U*") array_utf8 = content.unpack("U*")
array_enc = [] array_enc = []
@ -23,5 +24,6 @@ module REXML
} }
array_utf8.pack('U*') array_utf8.pack('U*')
end end
EOL
end end
end end

View file

@ -1,5 +1,6 @@
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_string =<<-EOL
# Convert from UTF-8 # Convert from UTF-8
def encode content def encode content
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
@ -19,5 +20,6 @@ module REXML
def decode(str) def decode(str)
str.unpack('C*').pack('U*') str.unpack('C*').pack('U*')
end end
EOL
end end
end end

View file

@ -1,5 +1,6 @@
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_string =<<-EOL
def encode content def encode content
array_utf8 = content.unpack("U*") array_utf8 = content.unpack("U*")
array_enc = [] array_enc = []
@ -18,10 +19,11 @@ module REXML
def decode(str) def decode(str)
array_enc=str.unpack('C*') array_enc=str.unpack('C*')
array_utf8 = [] array_utf8 = []
2.step(arrayEnc.size-1, 2){|i| 2.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100) array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
} }
array_utf8.pack('U*') array_utf8.pack('U*')
end end
EOL
end end
end end

View file

@ -1,5 +1,6 @@
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods =<<-EOL
def encode content def encode content
content content
end end
@ -7,5 +8,6 @@ module REXML
def decode(str) def decode(str)
str str
end end
EOL
end end
end end

View file

@ -27,7 +27,13 @@ module REXML
def Functions::namespace_context; @@namespace_context; end def Functions::namespace_context; @@namespace_context; end
def Functions::text( ) def Functions::text( )
return true if @@node.node_type == :text if @@node.node_type == :element
return @@node.text
elsif @@node.node_type == :text
return @@node.value
else
return false
end
end end
def Functions::last( ) def Functions::last( )

View file

@ -136,7 +136,7 @@ module REXML
def text=( foo ) def text=( foo )
replace = at(4).kind_of?(String)? 1 : 0 replace = at(4).kind_of?(String)? 1 : 0
_old_put(4, replace, normalizefoo) self._old_put(4,replace, normalizefoo)
end end
def root def root

View file

@ -3,7 +3,9 @@ require 'rexml/encoding'
module REXML module REXML
class Output class Output
include Encoding include Encoding
attr_reader :encoding attr_reader :encoding
def initialize real_IO, encd="iso-8859-1" def initialize real_IO, encd="iso-8859-1"
@output = real_IO @output = real_IO
self.encoding = encd self.encoding = encd
@ -12,7 +14,11 @@ module REXML
end end
def <<( content ) def <<( content )
@output << (@to_utf ? encode(content) : content) @output << (@to_utf ? self.encode(content) : content)
end
def to_s
"Output[#{encoding}]"
end end
end end
end end

View file

@ -29,8 +29,6 @@ module REXML
err << "Position: #{position}\n" err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n" err << "Last 80 unconsumed characters:\n"
err << @source.buffer[0..80].gsub(/\n/, ' ') err << @source.buffer[0..80].gsub(/\n/, ' ')
err << "\n"
err << @source.buffer[0..80].unpack("U*").inspect
end end
err err

View file

@ -56,6 +56,7 @@ module REXML
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)" ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)" NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))" ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
@ -214,8 +215,13 @@ module REXML
if @document_status == :in_doctype if @document_status == :in_doctype
md = @source.match(/\s*(.*?>)/um) md = @source.match(/\s*(.*?>)/um)
case md[1] case md[1]
when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1]
return [ :externalentity, match ]
when ELEMENTDECL_START when ELEMENTDECL_START
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
when ENTITY_START when ENTITY_START
match = @source.match( ENTITYDECL, true ).to_a.compact match = @source.match( ENTITYDECL, true ).to_a.compact
match[0] = :entitydecl match[0] = :entitydecl

View file

@ -1,3 +1,7 @@
require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
module REXML module REXML
module Parsers module Parsers
class SAX2Parser class SAX2Parser
@ -85,7 +89,7 @@ module REXML
if procs or listeners if procs or listeners
# break out the namespace declarations # break out the namespace declarations
# The attributes live in event[2] # The attributes live in event[2]
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns:/ } nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
nsdecl.collect! { |n, value| [ n[6..-1], value ] } nsdecl.collect! { |n, value| [ n[6..-1], value ] }
@namespace_stack.push({}) @namespace_stack.push({})
nsdecl.each do |n,v| nsdecl.each do |n,v|
@ -194,10 +198,9 @@ module REXML
end end
def get_namespace( prefix ) def get_namespace( prefix )
uri = @namespace_stack.find do |ns| uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
not ns[prefix].nil? (@namespace_stack.find { |ns| not ns[nil].nil? })
end uris[-1][prefix] unless uris.nil? or 0 == uris.size
uri[prefix] unless uri.nil?
end end
end end
end end

View file

@ -507,9 +507,11 @@ module REXML
parsed << varname parsed << varname
#arry << @variables[ varname ] #arry << @variables[ varname ]
when /^(\w[-\w]*)(?:\()/ when /^(\w[-\w]*)(?:\()/
#puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
fname = $1 fname = $1
path = $' path = $'
return nil if fname =~ NT #puts "#{fname} =~ #{NT.inspect}"
#return nil if fname =~ NT
parsed << :function parsed << :function
parsed << fname parsed << fname
path = FunctionCall(path, parsed) path = FunctionCall(path, parsed)
@ -532,6 +534,7 @@ module REXML
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
def FunctionCall rest, parsed def FunctionCall rest, parsed
path, arguments = parse_args(rest) path, arguments = parse_args(rest)
#puts "Function call >>> (#{arguments.inspect})"
argset = [] argset = []
for argument in arguments for argument in arguments
args = [] args = []

View file

@ -2,8 +2,8 @@
# #
# URL: http://www.germane-software.com/software/rexml # URL: http://www.germane-software.com/software/rexml
# Author: Sean Russell <ser@germane-software.com> # Author: Sean Russell <ser@germane-software.com>
# Version: 2.5.6 # Version: 2.7.2
# Date: +2003/054 # Date: +2003/343
@ -21,6 +21,6 @@
# A tutorial is available in docs/tutorial.html # A tutorial is available in docs/tutorial.html
module REXML module REXML
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>" Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
Date = "+2003/283" Date = "+2003/343"
Version = "2.7.2" Version = "2.7.2"
end end

View file

@ -28,7 +28,7 @@ module REXML
# Constructor # Constructor
# @param arg must be a String, and should be a valid XML document # @param arg must be a String, and should be a valid XML document
def initialize arg def initialize(arg)
@orig = @buffer = arg @orig = @buffer = arg
self.encoding = check_encoding( @buffer ) self.encoding = check_encoding( @buffer )
#@buffer = decode(@buffer) unless @encoding == UTF_8 #@buffer = decode(@buffer) unless @encoding == UTF_8
@ -64,10 +64,10 @@ module REXML
# everything after it in the Source. # everything after it in the Source.
# @return the pattern, if found, or nil if the Source is empty or the # @return the pattern, if found, or nil if the Source is empty or the
# pattern is not found. # pattern is not found.
def scan pattern, consume=false def scan(pattern, cons=false)
return nil if @buffer.nil? return nil if @buffer.nil?
rv = @buffer.scan(pattern) rv = @buffer.scan(pattern)
@buffer = $' if consume and rv.size>0 @buffer = $' if cons and rv.size>0
rv rv
end end
@ -88,21 +88,21 @@ module REXML
return md return md
end end
def match pattern, consume=false def match(pattern, cons=false)
md = pattern.match(@buffer) md = pattern.match(@buffer)
@buffer = $' if consume and md @buffer = $' if cons and md
return md return md
end end
# @return true if the Source is exhausted # @return true if the Source is exhausted
def empty? def empty?
@buffer.nil? or @buffer.strip.nil? @buffer.nil?
end end
# @return the current line in the source # @return the current line in the source
def current_line def current_line
lines = @orig.split lines = @orig.split
res = lines.grep(@buffer[0..30]) res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array res = res[-1] if res.kind_of? Array
lines.index( res ) if res lines.index( res ) if res
end end
@ -113,7 +113,7 @@ module REXML
class IOSource < Source class IOSource < Source
#attr_reader :block_size #attr_reader :block_size
def initialize arg, block_size=500 def initialize(arg, block_size=500)
@er_source = @source = arg @er_source = @source = arg
@to_utf = false @to_utf = false
# READLINE OPT # READLINE OPT
@ -127,7 +127,7 @@ module REXML
@line_break = encode( '>' ) @line_break = encode( '>' )
end end
def scan pattern, consume=false def scan(pattern, cons=false)
rv = super rv = super
# You'll notice that this next section is very similar to the same # You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is # section in match(), but just a liiittle different. This is
@ -166,16 +166,16 @@ module REXML
match( pattern, true ) match( pattern, true )
end end
def match pattern, consume=false def match( pattern, cons=false )
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if consume and rv @buffer = $' if cons and rv
while !rv and @source while !rv and @source
begin begin
str = @source.readline('>') str = @source.readline('>')
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if consume and rv @buffer = $' if cons and rv
rescue rescue
@source = nil @source = nil
end end

View file

@ -13,13 +13,16 @@ module REXML
STOP = '\?>'; STOP = '\?>';
attr_accessor :version, :standalone attr_accessor :version, :standalone
attr_reader :writeencoding
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil) def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@encoding_set = !encoding.nil? @writethis = true
@writeencoding = !encoding.nil?
if version.kind_of? XMLDecl if version.kind_of? XMLDecl
super() super()
@version = version.version @version = version.version
self.encoding = version.encoding self.encoding = version.encoding
@writeencoding = version.writeencoding
@standalone = version.standalone @standalone = version.standalone
else else
super() super()
@ -35,9 +38,14 @@ module REXML
end end
def write writer, indent=-1, transitive=false, ie_hack=false def write writer, indent=-1, transitive=false, ie_hack=false
return "" unless @writethis or writer.kind_of? Output
indent( writer, indent ) indent( writer, indent )
writer << START.sub(/\\/u, '') writer << START.sub(/\\/u, '')
writer << " #{content}" if writer.kind_of? Output
writer << " #{content writer.encoding}"
else
writer << " #{content encoding}"
end
writer << STOP.sub(/\\/u, '') writer << STOP.sub(/\\/u, '')
end end
@ -50,7 +58,6 @@ module REXML
def xmldecl version, encoding, standalone def xmldecl version, encoding, standalone
@version = version @version = version
@encoding_set = !encoding.nil?
self.encoding = encoding self.encoding = encoding
@standalone = standalone @standalone = standalone
end end
@ -60,11 +67,37 @@ module REXML
end end
alias :stand_alone? :standalone alias :stand_alone? :standalone
alias :old_enc= :encoding=
def encoding=( enc )
if enc.nil?
self.old_enc = "UTF-8"
@writeencoding = false
else
self.old_enc = enc
@writeencoding = true
end
self.dowrite
end
def XMLDecl.default
rv = XMLDecl.new( "1.0" )
rv.nowrite
rv
end
def nowrite
@writethis = false
end
def dowrite
@writethis = true
end
private private
def content def content(enc)
rv = "version='#@version'" rv = "version='#@version'"
rv << " encoding='#{encoding}'" if @encoding_set rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
rv << " standalone='#@standalone'" if @standalone rv << " standalone='#@standalone'" if @standalone
rv rv
end end