1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* Added the lower-case Shift-JIS files to the manifest. The upper-case ones

should be deprecated, but I need a Shift-JIS encoded XML file to test
  against, first.
* Added support for maintaining external entity occurances in DTDs
* Deprecated the use of Document::DECLARATION.  The new default declaration
  can be gotten with XMLDecl::default()
* Refactored the encoding support code.  It should be more robust now,
  and fixes a few bugs.
* The XPath string() function now deals with Element nodes properly.
* Serialization with Output objects now works as would be expected.
* Various code cleanups, some reducing the number of warnings that Ruby 1.8.x
  produces with REXML.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5144 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ser 2003-12-09 02:41:33 +00:00
parent 31963249b9
commit e6636fe890
21 changed files with 817 additions and 722 deletions

View file

@ -163,6 +163,20 @@ module REXML
end
end
class ExternalEntity < Child
def initialize( src )
super()
@entity = src
end
def to_s
@entity
end
def write( output, indent )
output << @entity
output << "\n"
end
end
class NotationDecl < Child
def initialize name, middle, rest
@name = name

View file

@ -19,7 +19,9 @@ module REXML
class Document < Element
# A convenient default XML declaration. If you want an XML declaration,
# the easiest way to add one is mydoc << Document::DECLARATION
DECLARATION = XMLDecl.new( "1.0", "UTF-8" )
# +DEPRECATED+
# Use: mydoc << XMLDecl.default
DECLARATION = XMLDecl.default
# Constructor
# @param source if supplied, must be a Document, String, or IO.
@ -102,30 +104,27 @@ module REXML
# @return the XMLDecl of this document; if no XMLDecl has been
# set, the default declaration is returned.
def xml_decl
rv = @children.find { |item| item.kind_of? XMLDecl }
rv = DECLARATION if rv.nil?
rv
rv = @children[0]
return rv if rv.kind_of? XMLDecl
rv = @children.unshift(XMLDecl.default)[0]
end
# @return the XMLDecl version of this document as a String.
# If no XMLDecl has been set, returns the default version.
def version
decl = xml_decl()
decl.nil? ? XMLDecl.DEFAULT_VERSION : decl.version
xml_decl().version
end
# @return the XMLDecl encoding of this document as a String.
# If no XMLDecl has been set, returns the default encoding.
def encoding
decl = xml_decl()
decl.nil? or decl.encoding.nil? ? XMLDecl.DEFAULT_ENCODING : decl.encoding
xml_decl().encoding
end
# @return the XMLDecl standalone value of this document as a String.
# If no XMLDecl has been set, returns the default setting.
def stand_alone?
decl = xml_decl()
decl.nil? ? XMLDecl.DEFAULT_STANDALONE : decl.stand_alone?
xml_decl().stand_alone?
end
# Write the XML tree out, optionally with indent. This writes out the
@ -154,8 +153,9 @@ module REXML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8"
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
@children.each { |node|
indent( output, indent) if node.node_type == :element
node.write( output, indent, transitive, ie_hack )
output << "\n" unless indent<0 or node == @children[-1]
}
@ -193,7 +193,7 @@ module REXML
build_context.add(
Text.new( event[1], true, nil, true )
) unless (
event[1].strip.size == 0 and
event[1].strip.size==0 and
build_context.ignore_whitespace_nodes
)
end
@ -218,6 +218,9 @@ module REXML
when :attlistdecl
n = AttlistDecl.new( event[1..-1] )
build_context.add( n )
when :externalentity
n = ExternalEntity.new( event[1] )
build_context.add( n )
when :elementdecl
n = ElementDecl.new( event[1] )
build_context.add(n)

View file

@ -18,20 +18,26 @@ module REXML
if enc and enc != UTF_8
@encoding = enc.upcase
begin
load 'rexml/encodings/ICONV.rb'
load 'rexml/encodings/ICONV.rb'
instance_eval @@__REXML_encoding_methods
Iconv::iconv( UTF_8, @encoding, "" )
rescue LoadError, Exception => err
raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
begin
load enc_file
load enc_file
instance_eval @@__REXML_encoding_methods
rescue LoadError
puts $!.message
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
end
end
else
enc = UTF_8
@encoding = enc.upcase
load 'rexml/encodings/UTF-8.rb'
load 'rexml/encodings/UTF-8.rb'
instance_eval @@__REXML_encoding_methods
end
ensure
$VERBOSE = old_verbosity

View file

@ -1,31 +1,36 @@
begin
require 'uconv'
require 'iconv'
module REXML
module Encoding
def decode(str)
return Uconv::euctou8(str)
end
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode(str)
return Iconv::iconv("utf-8", "euc-jp", str)[0]
end
def encode content
return Uconv::u8toeuc(content)
end
end
end
def encode content
return Iconv::iconv("euc-jp", "utf-8", content)[0]
end
EOL
end
end
rescue LoadError
begin
require 'iconv'
module REXML
module Encoding
def decode(str)
return Iconv::iconv("utf-8", "euc-jp", str)[0]
end
require 'uconv'
def encode content
return Iconv::iconv("euc-jp", "utf-8", content)[0]
end
end
end
module REXML
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode(str)
return Uconv::euctou8(str)
end
def encode content
return Uconv::u8toeuc(content)
end
EOL
end
end
rescue LoadError
raise "uconv or iconv is required for Japanese encoding support."
end

View file

@ -2,13 +2,15 @@ require "iconv"
raise LoadError unless defined? Iconv
module REXML
module Encoding
def decode( str )
return Iconv::iconv(UTF_8, @encoding, str)[0]
end
module Encoding
@@__REXML_encoding_methods =<<-EOL
def decode( str )
return Iconv::iconv("utf-8", @encoding, str)[0]
end
def encode( content )
return Iconv::iconv(@encoding, UTF_8, content)[0]
end
end
def encode( content )
return Iconv::iconv(@encoding, "utf-8", content)[0]
end
EOL
end
end

View file

@ -1,23 +1,25 @@
module REXML
module Encoding
# Convert from UTF-8
def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
array_enc.pack('C*')
end
module Encoding
@@__REXML_encoding_methods =<<-EOL
# Convert from UTF-8
def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
array_enc.pack('C*')
end
# Convert to UTF-8
def decode(str)
str.unpack('C*').pack('U*')
end
end
# Convert to UTF-8
def decode(str)
str.unpack('C*').pack('U*')
end
EOL
end
end

View file

@ -1,27 +1,29 @@
module REXML
module Encoding
def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << ??
array_enc << 0
else
array_enc << (num & 0xFF)
array_enc << (num >> 8)
end
end
array_enc.pack('C*')
end
module Encoding
@@__REXML_encoding_string =<<-EOL
def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << ??
array_enc << 0
else
array_enc << (num & 0xFF)
array_enc << (num >> 8)
end
end
array_enc.pack('C*')
end
def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
}
array_utf8.pack('U*')
end
end
def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
}
array_utf8.pack('U*')
end
EOL
end
end

View file

@ -1,23 +1,25 @@
module REXML
module Encoding
# Convert from UTF-8
def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
array_enc.pack('C*')
end
module Encoding
@@__REXML_encoding_string =<<-EOL
# Convert from UTF-8
def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
array_enc.pack('C*')
end
# Convert to UTF-8
def decode(str)
str.unpack('C*').pack('U*')
end
end
# Convert to UTF-8
def decode(str)
str.unpack('C*').pack('U*')
end
EOL
end
end

View file

@ -1,27 +1,29 @@
module REXML
module Encoding
def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << 0
array_enc << ??
else
array_enc << (num >> 8)
array_enc << (num & 0xFF)
end
end
array_enc.pack('C*')
end
module Encoding
@@__REXML_encoding_string =<<-EOL
def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << 0
array_enc << ??
else
array_enc << (num >> 8)
array_enc << (num & 0xFF)
end
end
array_enc.pack('C*')
end
def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(arrayEnc.size-1, 2){|i|
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
}
array_utf8.pack('U*')
end
end
def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
}
array_utf8.pack('U*')
end
EOL
end
end

View file

@ -1,11 +1,13 @@
module REXML
module Encoding
def encode content
content
end
module Encoding
@@__REXML_encoding_methods =<<-EOL
def encode content
content
end
def decode(str)
str
end
end
def decode(str)
str
end
EOL
end
end

View file

@ -27,7 +27,13 @@ module REXML
def Functions::namespace_context; @@namespace_context; end
def Functions::text( )
return true if @@node.node_type == :text
if @@node.node_type == :element
return @@node.text
elsif @@node.node_type == :text
return @@node.value
else
return false
end
end
def Functions::last( )

View file

@ -135,8 +135,8 @@ module REXML
end
def text=( foo )
replace = at(4).kind_of?(String) ? 1 : 0
_old_put(4, replace, normalizefoo)
replace = at(4).kind_of?(String)? 1 : 0
self._old_put(4,replace, normalizefoo)
end
def root

View file

@ -3,7 +3,9 @@ require 'rexml/encoding'
module REXML
class Output
include Encoding
attr_reader :encoding
attr_reader :encoding
def initialize real_IO, encd="iso-8859-1"
@output = real_IO
self.encoding = encd
@ -12,7 +14,11 @@ module REXML
end
def <<( content )
@output << (@to_utf ? encode(content) : content)
@output << (@to_utf ? self.encode(content) : content)
end
def to_s
"Output[#{encoding}]"
end
end
end

View file

@ -29,8 +29,6 @@ module REXML
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
err << @source.buffer[0..80].gsub(/\n/, ' ')
err << "\n"
err << @source.buffer[0..80].unpack("U*").inspect
end
err

View file

@ -56,6 +56,7 @@ module REXML
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
@ -214,8 +215,13 @@ module REXML
if @document_status == :in_doctype
md = @source.match(/\s*(.*?>)/um)
case md[1]
when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1]
return [ :externalentity, match ]
when ELEMENTDECL_START
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
when ENTITY_START
match = @source.match( ENTITYDECL, true ).to_a.compact
match[0] = :entitydecl

View file

@ -1,3 +1,7 @@
require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
module REXML
module Parsers
class SAX2Parser
@ -85,7 +89,7 @@ module REXML
if procs or listeners
# break out the namespace declarations
# The attributes live in event[2]
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns:/ }
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
@namespace_stack.push({})
nsdecl.each do |n,v|
@ -194,10 +198,9 @@ module REXML
end
def get_namespace( prefix )
uri = @namespace_stack.find do |ns|
not ns[prefix].nil?
end
uri[prefix] unless uri.nil?
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
(@namespace_stack.find { |ns| not ns[nil].nil? })
uris[-1][prefix] unless uris.nil? or 0 == uris.size
end
end
end

File diff suppressed because it is too large Load diff

View file

@ -2,8 +2,8 @@
#
# URL: http://www.germane-software.com/software/rexml
# Author: Sean Russell <ser@germane-software.com>
# Version: 2.5.6
# Date: +2003/054
# Version: 2.7.2
# Date: +2003/343
@ -21,6 +21,6 @@
# A tutorial is available in docs/tutorial.html
module REXML
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
Date = "+2003/283"
Date = "+2003/343"
Version = "2.7.2"
end

View file

@ -28,7 +28,7 @@ module REXML
# Constructor
# @param arg must be a String, and should be a valid XML document
def initialize arg
def initialize(arg)
@orig = @buffer = arg
self.encoding = check_encoding( @buffer )
#@buffer = decode(@buffer) unless @encoding == UTF_8
@ -64,10 +64,10 @@ module REXML
# everything after it in the Source.
# @return the pattern, if found, or nil if the Source is empty or the
# pattern is not found.
def scan pattern, consume=false
def scan(pattern, cons=false)
return nil if @buffer.nil?
rv = @buffer.scan(pattern)
@buffer = $' if consume and rv.size>0
@buffer = $' if cons and rv.size>0
rv
end
@ -88,21 +88,21 @@ module REXML
return md
end
def match pattern, consume=false
def match(pattern, cons=false)
md = pattern.match(@buffer)
@buffer = $' if consume and md
@buffer = $' if cons and md
return md
end
# @return true if the Source is exhausted
def empty?
@buffer.nil? or @buffer.strip.nil?
@buffer.nil?
end
# @return the current line in the source
def current_line
lines = @orig.split
res = lines.grep(@buffer[0..30])
res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
@ -113,7 +113,7 @@ module REXML
class IOSource < Source
#attr_reader :block_size
def initialize arg, block_size=500
def initialize(arg, block_size=500)
@er_source = @source = arg
@to_utf = false
# READLINE OPT
@ -127,7 +127,7 @@ module REXML
@line_break = encode( '>' )
end
def scan pattern, consume=false
def scan(pattern, cons=false)
rv = super
# You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is
@ -166,16 +166,16 @@ module REXML
match( pattern, true )
end
def match pattern, consume=false
def match( pattern, cons=false )
rv = pattern.match(@buffer)
@buffer = $' if consume and rv
@buffer = $' if cons and rv
while !rv and @source
begin
str = @source.readline('>')
str = decode(str) if @to_utf and str
@buffer << str
rv = pattern.match(@buffer)
@buffer = $' if consume and rv
@buffer = $' if cons and rv
rescue
@source = nil
end

View file

@ -245,7 +245,7 @@ module REXML
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
rv = string.clone
rv.gsub!( /\r\n?/, "\n" )
matches = rv.scan(REFERENCE)
matches = rv.scan( REFERENCE )
return rv if matches.size == 0
rv.gsub!( NUMERICENTITY ) {|m|
m=$1

View file

@ -13,13 +13,16 @@ module REXML
STOP = '\?>';
attr_accessor :version, :standalone
attr_reader :writeencoding
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@encoding_set = !encoding.nil?
@writethis = true
@writeencoding = !encoding.nil?
if version.kind_of? XMLDecl
super()
@version = version.version
self.encoding = version.encoding
@writeencoding = version.writeencoding
@standalone = version.standalone
else
super()
@ -35,9 +38,14 @@ module REXML
end
def write writer, indent=-1, transitive=false, ie_hack=false
return "" unless @writethis or writer.kind_of? Output
indent( writer, indent )
writer << START.sub(/\\/u, '')
writer << " #{content}"
if writer.kind_of? Output
writer << " #{content writer.encoding}"
else
writer << " #{content encoding}"
end
writer << STOP.sub(/\\/u, '')
end
@ -50,7 +58,6 @@ module REXML
def xmldecl version, encoding, standalone
@version = version
@encoding_set = !encoding.nil?
self.encoding = encoding
@standalone = standalone
end
@ -60,11 +67,37 @@ module REXML
end
alias :stand_alone? :standalone
alias :old_enc= :encoding=
def encoding=( enc )
if enc.nil?
self.old_enc = "UTF-8"
@writeencoding = false
else
self.old_enc = enc
@writeencoding = true
end
self.dowrite
end
def XMLDecl.default
rv = XMLDecl.new( "1.0" )
rv.nowrite
rv
end
def nowrite
@writethis = false
end
def dowrite
@writethis = true
end
private
def content
def content(enc)
rv = "version='#@version'"
rv << " encoding='#{encoding}'" if @encoding_set
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
rv << " standalone='#@standalone'" if @standalone
rv
end