mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* Changes to the encoding mechanism. If iconv is found, it is used first
for encoding changes. This should be the case on all 1.8 installations. When it isn't found (<1.6), the native REXML encoding mechanism is used. This cleaned out some files, and tightened up the code a bit; and iconv should be faster than the pure Ruby code. * Changed deprecated assert_not_nil to assert throughout the tests. * Parse exceptions are a little more verbose, and extend RuntimeError. * Bug fixes to XPathParser * The Light API is still shifting, like the sands of the desert. * Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and tightened error reporting in the base parser git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
662532be00
commit
7d21c237cc
23 changed files with 185 additions and 224 deletions
|
@ -2,61 +2,49 @@ module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
@@uconv_available = false
|
@@uconv_available = false
|
||||||
|
|
||||||
ENCODING_CLAIMS = { }
|
|
||||||
|
|
||||||
def Encoding.claim( encoding_str, match=nil )
|
|
||||||
if match
|
|
||||||
ENCODING_CLAIMS[ match ] = encoding_str
|
|
||||||
else
|
|
||||||
ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Native, default format is UTF-8, so it is declared here rather than in
|
# Native, default format is UTF-8, so it is declared here rather than in
|
||||||
# an encodings/ definition.
|
# an encodings/ definition.
|
||||||
UTF_8 = 'UTF-8'
|
UTF_8 = 'UTF-8'
|
||||||
claim( UTF_8 )
|
UTF_16 = 'UTF-16'
|
||||||
|
UNILE = 'UNILE'
|
||||||
|
|
||||||
# ID ---> Encoding name
|
# ID ---> Encoding name
|
||||||
attr_reader :encoding
|
attr_reader :encoding
|
||||||
def encoding=( enc )
|
def encoding=( enc )
|
||||||
enc = UTF_8 unless enc
|
old_verbosity = $VERBOSE
|
||||||
@encoding = enc.upcase
|
begin
|
||||||
require "rexml/encodings/#@encoding" unless @encoding == UTF_8
|
$VERBOSE = false
|
||||||
|
return if defined? @encoding and enc == @encoding
|
||||||
|
if enc and enc != UTF_8
|
||||||
|
@encoding = enc.upcase
|
||||||
|
begin
|
||||||
|
load 'rexml/encodings/ICONV.rb'
|
||||||
|
Iconv::iconv( UTF_8, @encoding, "" )
|
||||||
|
rescue LoadError, Exception => err
|
||||||
|
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
||||||
|
begin
|
||||||
|
load enc_file
|
||||||
|
rescue LoadError
|
||||||
|
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
enc = UTF_8
|
||||||
|
@encoding = enc.upcase
|
||||||
|
load 'rexml/encodings/UTF-8.rb'
|
||||||
|
end
|
||||||
|
ensure
|
||||||
|
$VERBOSE = old_verbosity
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def check_encoding str
|
def check_encoding str
|
||||||
rv = ENCODING_CLAIMS.find{|k,v| str =~ k }
|
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||||
# Raise an exception if there is a declared encoding and we don't
|
return UTF_16 if str[0] == 254 && str[1] == 255
|
||||||
# recognize it
|
return UNILE if str[0] == 255 && str[1] == 254
|
||||||
unless rv
|
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
|
||||||
if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/
|
return $1.upcase if $1
|
||||||
raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
|
return UTF_8
|
||||||
else
|
|
||||||
return UTF_8
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return rv[1]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_utf_8(str)
|
|
||||||
return str
|
|
||||||
end
|
|
||||||
|
|
||||||
def from_utf_8 content
|
|
||||||
return content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
module Encodingses
|
|
||||||
encodings = []
|
|
||||||
$:.each do |incl_dir|
|
|
||||||
if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
|
|
||||||
encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
|
|
||||||
end
|
|
||||||
encodings.collect!{ |f| File.basename(f) }
|
|
||||||
encodings.uniq!
|
|
||||||
end
|
|
||||||
encodings.each { |enc| require "rexml/encodings/#{enc}" }
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,30 +3,30 @@ begin
|
||||||
|
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
def from_euc_jp(str)
|
def decode(str)
|
||||||
return Uconv::euctou8(str)
|
return Uconv::euctou8(str)
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_euc_jp content
|
def encode content
|
||||||
return Uconv::u8toeuc(content)
|
return Uconv::u8toeuc(content)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
begin
|
begin
|
||||||
require 'iconv'
|
require 'iconv'
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
def from_euc_jp(str)
|
def decode(str)
|
||||||
return Iconv::iconv("utf-8", "euc-jp", str).join('')
|
return Iconv::iconv("utf-8", "euc-jp", str)[0]
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_euc_jp content
|
def encode content
|
||||||
return Iconv::iconv("euc-jp", "utf-8", content).join('')
|
return Iconv::iconv("euc-jp", "utf-8", content)[0]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
raise "uconv or iconv is required for Japanese encoding support."
|
raise "uconv or iconv is required for Japanese encoding support."
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
EUC_JP = 'EUC-JP'
|
|
||||||
claim( EUC_JP )
|
|
||||||
end
|
|
||||||
end
|
|
14
lib/rexml/encodings/ICONV.rb
Normal file
14
lib/rexml/encodings/ICONV.rb
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
require "iconv"
|
||||||
|
raise LoadError unless defined? Iconv
|
||||||
|
|
||||||
|
module REXML
|
||||||
|
module Encoding
|
||||||
|
def decode( str )
|
||||||
|
return Iconv::iconv(UTF_8, @encoding, str)[0]
|
||||||
|
end
|
||||||
|
|
||||||
|
def encode( content )
|
||||||
|
return Iconv::iconv(@encoding, UTF_8, content)[0]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,7 +1,7 @@
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
# Convert from UTF-8
|
# Convert from UTF-8
|
||||||
def to_iso_8859_1 content
|
def encode content
|
||||||
array_utf8 = content.unpack('U*')
|
array_utf8 = content.unpack('U*')
|
||||||
array_enc = []
|
array_enc = []
|
||||||
array_utf8.each do |num|
|
array_utf8.each do |num|
|
||||||
|
@ -16,7 +16,7 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convert to UTF-8
|
# Convert to UTF-8
|
||||||
def from_iso_8859_1(str)
|
def decode(str)
|
||||||
str.unpack('C*').pack('U*')
|
str.unpack('C*').pack('U*')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
ISO_8859_1 = 'ISO-8859-1'
|
|
||||||
claim( ISO_8859_1 )
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,6 +0,0 @@
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
claim( 'Shift-JIS' )
|
|
||||||
claim( 'Shift_JIS' )
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,6 +1,6 @@
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
def to_unile content
|
def encode content
|
||||||
array_utf8 = content.unpack("U*")
|
array_utf8 = content.unpack("U*")
|
||||||
array_enc = []
|
array_enc = []
|
||||||
array_utf8.each do |num|
|
array_utf8.each do |num|
|
||||||
|
@ -15,7 +15,7 @@ module REXML
|
||||||
array_enc.pack('C*')
|
array_enc.pack('C*')
|
||||||
end
|
end
|
||||||
|
|
||||||
def from_unile(str)
|
def decode(str)
|
||||||
array_enc=str.unpack('C*')
|
array_enc=str.unpack('C*')
|
||||||
array_utf8 = []
|
array_utf8 = []
|
||||||
2.step(array_enc.size-1, 2){|i|
|
2.step(array_enc.size-1, 2){|i|
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
UNILE = 'UNILE'
|
|
||||||
claim( UNILE, /^\377\376/ )
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,7 +1,7 @@
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
# Convert from UTF-8
|
# Convert from UTF-8
|
||||||
def to_us_ascii content
|
def encode content
|
||||||
array_utf8 = content.unpack('U*')
|
array_utf8 = content.unpack('U*')
|
||||||
array_enc = []
|
array_enc = []
|
||||||
array_utf8.each do |num|
|
array_utf8.each do |num|
|
||||||
|
@ -16,7 +16,7 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convert to UTF-8
|
# Convert to UTF-8
|
||||||
def from_us_ascii(str)
|
def decode(str)
|
||||||
str.unpack('C*').pack('U*')
|
str.unpack('C*').pack('U*')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
US_ASCII = 'US-ASCII'
|
|
||||||
claim( US_ASCII )
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,6 +1,6 @@
|
||||||
module REXML
|
module REXML
|
||||||
module Encoding
|
module Encoding
|
||||||
def to_utf_16 content
|
def encode content
|
||||||
array_utf8 = content.unpack("U*")
|
array_utf8 = content.unpack("U*")
|
||||||
array_enc = []
|
array_enc = []
|
||||||
array_utf8.each do |num|
|
array_utf8.each do |num|
|
||||||
|
@ -15,7 +15,7 @@ module REXML
|
||||||
array_enc.pack('C*')
|
array_enc.pack('C*')
|
||||||
end
|
end
|
||||||
|
|
||||||
def from_utf_16(str)
|
def decode(str)
|
||||||
array_enc=str.unpack('C*')
|
array_enc=str.unpack('C*')
|
||||||
array_utf8 = []
|
array_utf8 = []
|
||||||
2.step(arrayEnc.size-1, 2){|i|
|
2.step(arrayEnc.size-1, 2){|i|
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
UTF_16 = 'UTF-16'
|
|
||||||
claim( UTF_16, /^\376\377/ )
|
|
||||||
end
|
|
||||||
end
|
|
11
lib/rexml/encodings/UTF-8.rb
Normal file
11
lib/rexml/encodings/UTF-8.rb
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
module REXML
|
||||||
|
module Encoding
|
||||||
|
def encode content
|
||||||
|
content
|
||||||
|
end
|
||||||
|
|
||||||
|
def decode(str)
|
||||||
|
str
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,76 +1,58 @@
|
||||||
require 'rexml/xmltokens'
|
require 'rexml/xmltokens'
|
||||||
require 'rexml/light/node'
|
require 'rexml/light/node'
|
||||||
|
|
||||||
# Development model
|
# [ :element, parent, name, attributes, children* ]
|
||||||
# document = Node.new
|
# a = Node.new
|
||||||
|
# a << "B" # => <a>B</a>
|
||||||
# Add an element "foo" to the document
|
# a.b # => <a>B<b/></a>
|
||||||
# foo = document << "foo"
|
# a.b[1] # => <a>B<b/><b/><a>
|
||||||
# # Set attribute "attr" on foo
|
# a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
|
||||||
# foo["attr"] = "la"
|
# a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
|
||||||
# # Set another attribute in a different namespace
|
# a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
|
||||||
# foo["attr", "namespace"] = "too"
|
|
||||||
# # Swap foo into another namespace
|
|
||||||
# foo.namespace = "blah"
|
|
||||||
# # Add a couple of element nodes to foo
|
|
||||||
# foo << "a"
|
|
||||||
# foo << "b"
|
|
||||||
# # Access the children of foo in various ways
|
|
||||||
# a = foo[0]
|
|
||||||
# foo.each { |child|
|
|
||||||
# #...
|
|
||||||
# }
|
|
||||||
# # Add text to foo
|
|
||||||
# # Add instruction
|
|
||||||
# # Add comment
|
|
||||||
# # Get the root of the document
|
|
||||||
# document == a.root
|
|
||||||
# # Write the document out
|
|
||||||
# puts document.to_s
|
|
||||||
module REXML
|
module REXML
|
||||||
module Light
|
module Light
|
||||||
# Represents a tagged XML element. Elements are characterized by
|
# Represents a tagged XML element. Elements are characterized by
|
||||||
# having children, attributes, and names, and can themselves be
|
# having children, attributes, and names, and can themselves be
|
||||||
# children.
|
# children.
|
||||||
class Node < Array
|
class Node
|
||||||
alias :_old_get :[]
|
|
||||||
alias :_old_put :[]=
|
|
||||||
|
|
||||||
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
|
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
|
||||||
|
PARENTS = [ :element, :document, :doctype ]
|
||||||
# Create a new element.
|
# Create a new element.
|
||||||
def initialize node=nil
|
def initialize node=nil
|
||||||
|
@node = node
|
||||||
if node.kind_of? String
|
if node.kind_of? String
|
||||||
node = [ :text, node ]
|
node = [ :text, node ]
|
||||||
elsif node.nil?
|
elsif node.nil?
|
||||||
node = [ :document, nil, nil ]
|
node = [ :document, nil, nil ]
|
||||||
elsif node[0] == :start_element
|
elsif node[0] == :start_element
|
||||||
node[0] = :element
|
node[0] = :element
|
||||||
|
elsif node[0] == :start_doctype
|
||||||
|
node[0] = :doctype
|
||||||
|
elsif node[0] == :start_document
|
||||||
|
node[0] = :document
|
||||||
end
|
end
|
||||||
replace( node )
|
|
||||||
_old_put( 1, 0, 1 )
|
|
||||||
_old_put( 1, nil )
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def size
|
def size
|
||||||
el!()
|
if PARENTS.include? @node[0]
|
||||||
super-4
|
@node[-1].size
|
||||||
|
else
|
||||||
|
0
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def each( &block )
|
def each( &block )
|
||||||
el!()
|
|
||||||
size.times { |x| yield( at(x+4) ) }
|
size.times { |x| yield( at(x+4) ) }
|
||||||
end
|
end
|
||||||
|
|
||||||
def name
|
def name
|
||||||
el!()
|
|
||||||
at(2)
|
at(2)
|
||||||
end
|
end
|
||||||
|
|
||||||
def name=( name_str, ns=nil )
|
def name=( name_str, ns=nil )
|
||||||
el!()
|
|
||||||
pfx = ''
|
pfx = ''
|
||||||
pfx = "#{prefix(ns)}:" if ns
|
pfx = "#{prefix(ns)}:" if ns
|
||||||
_old_put(1, "#{pfx}#{name_str}")
|
_old_put(2, "#{pfx}#{name_str}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def parent=( node )
|
def parent=( node )
|
||||||
|
@ -78,28 +60,23 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def local_name
|
def local_name
|
||||||
el!()
|
|
||||||
namesplit
|
namesplit
|
||||||
@name
|
@name
|
||||||
end
|
end
|
||||||
|
|
||||||
def local_name=( name_str )
|
def local_name=( name_str )
|
||||||
el!()
|
|
||||||
_old_put( 1, "#@prefix:#{name_str}" )
|
_old_put( 1, "#@prefix:#{name_str}" )
|
||||||
end
|
end
|
||||||
|
|
||||||
def prefix( namespace=nil )
|
def prefix( namespace=nil )
|
||||||
el!()
|
|
||||||
prefix_of( self, namespace )
|
prefix_of( self, namespace )
|
||||||
end
|
end
|
||||||
|
|
||||||
def namespace( prefix=prefix() )
|
def namespace( prefix=prefix() )
|
||||||
el!()
|
|
||||||
namespace_of( self, prefix )
|
namespace_of( self, prefix )
|
||||||
end
|
end
|
||||||
|
|
||||||
def namespace=( namespace )
|
def namespace=( namespace )
|
||||||
el!()
|
|
||||||
@prefix = prefix( namespace )
|
@prefix = prefix( namespace )
|
||||||
pfx = ''
|
pfx = ''
|
||||||
pfx = "#@prefix:" if @prefix.size > 0
|
pfx = "#@prefix:" if @prefix.size > 0
|
||||||
|
@ -107,7 +84,6 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def []( reference, ns=nil )
|
def []( reference, ns=nil )
|
||||||
el!()
|
|
||||||
if reference.kind_of? String
|
if reference.kind_of? String
|
||||||
pfx = ''
|
pfx = ''
|
||||||
pfx = "#{prefix(ns)}:" if ns
|
pfx = "#{prefix(ns)}:" if ns
|
||||||
|
@ -125,7 +101,6 @@ module REXML
|
||||||
|
|
||||||
# Doesn't handle namespaces yet
|
# Doesn't handle namespaces yet
|
||||||
def []=( reference, ns, value=nil )
|
def []=( reference, ns, value=nil )
|
||||||
el!()
|
|
||||||
if reference.kind_of? String
|
if reference.kind_of? String
|
||||||
value = ns unless value
|
value = ns unless value
|
||||||
at( 3 )[reference] = value
|
at( 3 )[reference] = value
|
||||||
|
@ -170,12 +145,10 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def has_name?( name, namespace = '' )
|
def has_name?( name, namespace = '' )
|
||||||
el!()
|
|
||||||
at(3) == name and namespace() == namespace
|
at(3) == name and namespace() == namespace
|
||||||
end
|
end
|
||||||
|
|
||||||
def children
|
def children
|
||||||
el!()
|
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -187,14 +160,6 @@ module REXML
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def el!
|
|
||||||
if node_type() != :element and node_type() != :document
|
|
||||||
_old_put( 0, :element )
|
|
||||||
push({})
|
|
||||||
end
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def namesplit
|
def namesplit
|
||||||
|
|
|
@ -8,10 +8,6 @@ module REXML
|
||||||
@output = real_IO
|
@output = real_IO
|
||||||
self.encoding = encd
|
self.encoding = encd
|
||||||
|
|
||||||
eval <<-EOL
|
|
||||||
alias :encode :to_#{encoding.tr('-', '_').downcase}
|
|
||||||
alias :decode :from_#{encoding.tr('-', '_').downcase}
|
|
||||||
EOL
|
|
||||||
@to_utf = encd == UTF_8 ? false : true
|
@to_utf = encd == UTF_8 ? false : true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
module REXML
|
module REXML
|
||||||
class ParseException < Exception
|
class ParseException < RuntimeError
|
||||||
attr_accessor :source, :parser, :continued_exception
|
attr_accessor :source, :parser, :continued_exception
|
||||||
|
|
||||||
def initialize( message, source=nil, parser=nil, exception=nil )
|
def initialize( message, source=nil, parser=nil, exception=nil )
|
||||||
|
@ -12,9 +12,9 @@ module REXML
|
||||||
def to_s
|
def to_s
|
||||||
# Quote the original exception, if there was one
|
# Quote the original exception, if there was one
|
||||||
if @continued_exception
|
if @continued_exception
|
||||||
err = @continued_exception.message
|
err = @continued_exception.inspect
|
||||||
err << "\n"
|
err << "\n"
|
||||||
err << @continued_exception.backtrace[0..3].join("\n")
|
err << @continued_exception.backtrace.join("\n")
|
||||||
err << "\n...\n"
|
err << "\n...\n"
|
||||||
else
|
else
|
||||||
err = ""
|
err = ""
|
||||||
|
@ -24,17 +24,24 @@ module REXML
|
||||||
err << super
|
err << super
|
||||||
|
|
||||||
# Add contextual information
|
# Add contextual information
|
||||||
err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source
|
if @source
|
||||||
err << "\nContext:\n#{@parser.context}" if @parser
|
err << "\nLine: #{line}\n"
|
||||||
|
err << "Position: #{position}\n"
|
||||||
|
err << "Last 80 unconsumed characters:\n"
|
||||||
|
err << @source.buffer[0..80].gsub(/\n/, ' ')
|
||||||
|
err << "\n"
|
||||||
|
err << @source.buffer[0..80].unpack("U*").inspect
|
||||||
|
end
|
||||||
|
|
||||||
err
|
err
|
||||||
end
|
end
|
||||||
|
|
||||||
def position
|
def position
|
||||||
@source.current_line[0] if @source
|
@source.current_line[0] if @source and @source.current_line
|
||||||
end
|
end
|
||||||
|
|
||||||
def line
|
def line
|
||||||
@source.current_line[2] if @source
|
@source.current_line[2] if @source and @source.current_line
|
||||||
end
|
end
|
||||||
|
|
||||||
def context
|
def context
|
||||||
|
|
|
@ -89,10 +89,10 @@ module REXML
|
||||||
EREFERENCE = /&(?!#{NAME};)/
|
EREFERENCE = /&(?!#{NAME};)/
|
||||||
|
|
||||||
DEFAULT_ENTITIES = {
|
DEFAULT_ENTITIES = {
|
||||||
'gt' => [/>/, '>', '>'],
|
'gt' => [/>/, '>', '>', />/],
|
||||||
'lt' => [/</, '<', '<'],
|
'lt' => [/</, '<', '<', /</],
|
||||||
'quot' => [/"/, '"', '"'],
|
'quot' => [/"/, '"', '"', /"/],
|
||||||
"apos" => [/'/, "'", "'"]
|
"apos" => [/'/, "'", "'", /'/]
|
||||||
}
|
}
|
||||||
|
|
||||||
def initialize( source )
|
def initialize( source )
|
||||||
|
@ -126,6 +126,7 @@ module REXML
|
||||||
|
|
||||||
# Returns true if there are more events. Synonymous with !empty?
|
# Returns true if there are more events. Synonymous with !empty?
|
||||||
def has_next?
|
def has_next?
|
||||||
|
return true if @closed
|
||||||
@source.read if @source.buffer.size==0 and !@source.empty?
|
@source.read if @source.buffer.size==0 and !@source.empty?
|
||||||
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
|
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
|
||||||
end
|
end
|
||||||
|
@ -143,7 +144,7 @@ module REXML
|
||||||
# event, so you can effectively pre-parse the entire document (pull the
|
# event, so you can effectively pre-parse the entire document (pull the
|
||||||
# entire thing into memory) using this method.
|
# entire thing into memory) using this method.
|
||||||
def peek depth=0
|
def peek depth=0
|
||||||
raise 'Illegal argument "#{depth}"' if depth < -1
|
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
||||||
temp = []
|
temp = []
|
||||||
if depth == -1
|
if depth == -1
|
||||||
temp.push(pull()) until empty?
|
temp.push(pull()) until empty?
|
||||||
|
@ -166,8 +167,9 @@ module REXML
|
||||||
return @stack.shift if @stack.size > 0
|
return @stack.shift if @stack.size > 0
|
||||||
@source.read if @source.buffer.size==0
|
@source.read if @source.buffer.size==0
|
||||||
if @document_status == nil
|
if @document_status == nil
|
||||||
@source.match( /^\s*/um, true )
|
@source.consume( /^\s*/um )
|
||||||
word = @source.match( /^\s*(<.*?)>/um )
|
word = @source.match( /(<.*?)>/um )
|
||||||
|
#word = @source.match_to( '>', /(<.*?)>/um )
|
||||||
word = word[1] unless word.nil?
|
word = word[1] unless word.nil?
|
||||||
case word
|
case word
|
||||||
when COMMENT_START
|
when COMMENT_START
|
||||||
|
@ -190,7 +192,7 @@ module REXML
|
||||||
close = md[2]
|
close = md[2]
|
||||||
identity =~ IDENTITY
|
identity =~ IDENTITY
|
||||||
name = $1
|
name = $1
|
||||||
raise "DOCTYPE is missing a name" if name.nil?
|
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
|
||||||
pub_sys = $2.nil? ? nil : $2.strip
|
pub_sys = $2.nil? ? nil : $2.strip
|
||||||
long_name = $3.nil? ? nil : $3.strip
|
long_name = $3.nil? ? nil : $3.strip
|
||||||
uri = $4.nil? ? nil : $4.strip
|
uri = $4.nil? ? nil : $4.strip
|
||||||
|
@ -274,10 +276,11 @@ module REXML
|
||||||
return [ :end_doctype ]
|
return [ :end_doctype ]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
begin
|
begin
|
||||||
if @source.buffer[0] == ?<
|
if @source.buffer[0] == ?<
|
||||||
if @source.buffer[1] == ?/
|
if @source.buffer[1] == ?/
|
||||||
last_tag = @tags.pop
|
last_tag = @tags.pop
|
||||||
|
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
||||||
md = @source.match( CLOSE_MATCH, true )
|
md = @source.match( CLOSE_MATCH, true )
|
||||||
raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
|
raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
|
||||||
"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
|
"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
|
||||||
|
@ -286,18 +289,20 @@ module REXML
|
||||||
md = @source.match(/\A(\s*[^>]*>)/um)
|
md = @source.match(/\A(\s*[^>]*>)/um)
|
||||||
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
||||||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
||||||
case md[1]
|
if md[0][2] == ?-
|
||||||
when CDATA_START
|
md = @source.match( COMMENT_PATTERN, true )
|
||||||
return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
|
return [ :comment, md[1] ] if md
|
||||||
when COMMENT_START
|
|
||||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
|
||||||
else
|
else
|
||||||
raise REXML::ParseException.new( "Declarations can only occur "+
|
md = @source.match( CDATA_PATTERN, true )
|
||||||
"in the doctype declaration.", @source)
|
return [ :cdata, md[1] ] if md
|
||||||
end
|
end
|
||||||
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
||||||
|
"in the doctype declaration.", @source)
|
||||||
elsif @source.buffer[1] == ??
|
elsif @source.buffer[1] == ??
|
||||||
md = @source.match( INSTRUCTION_PATTERN, true )
|
md = @source.match( INSTRUCTION_PATTERN, true )
|
||||||
return [ :processing_instruction, md[1], md[2] ]
|
return [ :processing_instruction, md[1], md[2] ] if md
|
||||||
|
raise REXML::ParseException.new( "Bad instruction declaration",
|
||||||
|
@source)
|
||||||
else
|
else
|
||||||
# Get the next tag
|
# Get the next tag
|
||||||
md = @source.match(TAG_MATCH, true)
|
md = @source.match(TAG_MATCH, true)
|
||||||
|
@ -318,17 +323,19 @@ module REXML
|
||||||
return [ :start_element, md[1], attributes ]
|
return [ :start_element, md[1], attributes ]
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
md = @source.match(TEXT_PATTERN, true)
|
md = @source.match( TEXT_PATTERN, true )
|
||||||
raise "no text to add" if md[0].length == 0
|
#md = @source.match_to_consume( '<', TEXT_PATTERN )
|
||||||
|
#@source.read
|
||||||
|
raise REXML::ParseException("no text to add") if md[0].length == 0
|
||||||
# unnormalized = Text::unnormalize( md[1], self )
|
# unnormalized = Text::unnormalize( md[1], self )
|
||||||
# return PullEvent.new( :text, md[1], unnormalized )
|
# return PullEvent.new( :text, md[1], unnormalized )
|
||||||
return [ :text, md[1] ]
|
return [ :text, md[1] ]
|
||||||
end
|
end
|
||||||
rescue REXML::ParseException
|
rescue REXML::ParseException
|
||||||
raise $!
|
raise
|
||||||
rescue Exception, NameError => error
|
rescue Exception, NameError => error
|
||||||
raise REXML::ParseException.new( "Exception parsing",
|
raise REXML::ParseException.new( "Exception parsing",
|
||||||
@source, self, error )
|
@source, self, (error ? error : $!) )
|
||||||
end
|
end
|
||||||
return [ :dummy ]
|
return [ :dummy ]
|
||||||
end
|
end
|
||||||
|
@ -354,7 +361,7 @@ module REXML
|
||||||
end if entities
|
end if entities
|
||||||
copy.gsub!( EREFERENCE, '&' )
|
copy.gsub!( EREFERENCE, '&' )
|
||||||
DEFAULT_ENTITIES.each do |key, value|
|
DEFAULT_ENTITIES.each do |key, value|
|
||||||
copy.gsub!( value[2], value[1] )
|
copy.gsub!( value[3], value[1] )
|
||||||
end
|
end
|
||||||
copy
|
copy
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,25 +16,25 @@ module REXML
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse
|
def parse
|
||||||
root = context = REXML::Light::Node.new([ :document ])
|
root = context = [ :document ]
|
||||||
while true
|
while true
|
||||||
event = @parser.pull
|
event = @parser.pull
|
||||||
case event[0]
|
case event[0]
|
||||||
when :end_document
|
when :end_document
|
||||||
break
|
break
|
||||||
when :end_doctype
|
when :end_doctype
|
||||||
context = context.parent
|
context = context[1]
|
||||||
when :start_element, :start_doctype
|
when :start_element, :start_doctype
|
||||||
new_node = REXML::Light::Node.new(event)
|
new_node = event
|
||||||
context << new_node
|
context << new_node
|
||||||
new_node.parent = context
|
new_node[1,0] = [context]
|
||||||
context = new_node
|
context = new_node
|
||||||
when :end_element, :end_doctype
|
when :end_element, :end_doctype
|
||||||
context = context.parent
|
context = context[1]
|
||||||
else
|
else
|
||||||
new_node = REXML::Light::Node.new(event)
|
new_node = event
|
||||||
context << new_node
|
context << new_node
|
||||||
new_node.parent = context
|
new_node[1,0] = [context]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
root
|
root
|
||||||
|
|
|
@ -31,7 +31,7 @@ module REXML
|
||||||
results = filter([element], path)
|
results = filter([element], path)
|
||||||
when /^\*/u
|
when /^\*/u
|
||||||
results = filter(element.to_a, path)
|
results = filter(element.to_a, path)
|
||||||
when /^[\[!\w:]/u
|
when /^[[!\w:]/u
|
||||||
# match on child
|
# match on child
|
||||||
matches = []
|
matches = []
|
||||||
children = element.to_a
|
children = element.to_a
|
||||||
|
|
|
@ -21,6 +21,6 @@
|
||||||
# A tutorial is available in docs/tutorial.html
|
# A tutorial is available in docs/tutorial.html
|
||||||
module REXML
|
module REXML
|
||||||
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
|
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
|
||||||
Date = "+2003/110"
|
Date = "+2003/283"
|
||||||
Version = "2.7.1"
|
Version = "2.7.2"
|
||||||
end
|
end
|
||||||
|
|
|
@ -39,10 +39,6 @@ module REXML
|
||||||
# Overridden to support optimized en/decoding
|
# Overridden to support optimized en/decoding
|
||||||
def encoding=(enc)
|
def encoding=(enc)
|
||||||
super
|
super
|
||||||
eval <<-EOL
|
|
||||||
alias :encode :to_#{encoding.tr('-', '_').downcase}
|
|
||||||
alias :decode :from_#{encoding.tr('-', '_').downcase}
|
|
||||||
EOL
|
|
||||||
@line_break = encode( '>' )
|
@line_break = encode( '>' )
|
||||||
if enc != UTF_8
|
if enc != UTF_8
|
||||||
@buffer = decode(@buffer)
|
@buffer = decode(@buffer)
|
||||||
|
@ -78,8 +74,22 @@ module REXML
|
||||||
def read
|
def read
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def consume( pattern )
|
||||||
|
@buffer = $' if pattern.match( @buffer )
|
||||||
|
end
|
||||||
|
|
||||||
|
def match_to( char, pattern )
|
||||||
|
return pattern.match(@buffer)
|
||||||
|
end
|
||||||
|
|
||||||
|
def match_to_consume( char, pattern )
|
||||||
|
md = pattern.match(@buffer)
|
||||||
|
@buffer = $'
|
||||||
|
return md
|
||||||
|
end
|
||||||
|
|
||||||
def match pattern, consume=false
|
def match pattern, consume=false
|
||||||
md = pattern.match @buffer
|
md = pattern.match(@buffer)
|
||||||
@buffer = $' if consume and md
|
@buffer = $' if consume and md
|
||||||
return md
|
return md
|
||||||
end
|
end
|
||||||
|
@ -112,7 +122,9 @@ module REXML
|
||||||
#@block_size = block_size
|
#@block_size = block_size
|
||||||
#super @source.read(@block_size)
|
#super @source.read(@block_size)
|
||||||
@line_break = '>'
|
@line_break = '>'
|
||||||
super @source.readline( @line_break )
|
#super @source.readline( "\n" )
|
||||||
|
super @source.readline( @line_break )+@source.read
|
||||||
|
@line_break = encode( '>' )
|
||||||
end
|
end
|
||||||
|
|
||||||
def scan pattern, consume=false
|
def scan pattern, consume=false
|
||||||
|
@ -145,11 +157,15 @@ module REXML
|
||||||
str = @source.readline('>')
|
str = @source.readline('>')
|
||||||
str = decode(str) if @to_utf and str
|
str = decode(str) if @to_utf and str
|
||||||
@buffer << str
|
@buffer << str
|
||||||
rescue
|
rescue Exception, NameError
|
||||||
@source = nil
|
@source = nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def consume( pattern )
|
||||||
|
match( pattern, true )
|
||||||
|
end
|
||||||
|
|
||||||
def match pattern, consume=false
|
def match pattern, consume=false
|
||||||
rv = pattern.match(@buffer)
|
rv = pattern.match(@buffer)
|
||||||
@buffer = $' if consume and rv
|
@buffer = $' if consume and rv
|
||||||
|
|
|
@ -2,16 +2,6 @@ require 'rexml/namespace'
|
||||||
require 'rexml/xmltokens'
|
require 'rexml/xmltokens'
|
||||||
require 'rexml/parsers/xpathparser'
|
require 'rexml/parsers/xpathparser'
|
||||||
|
|
||||||
# Ignore this class. It adds a __ne__ method, because Ruby doesn't seem to
|
|
||||||
# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
|
|
||||||
# and all of the other comparison methods. Stupid, and annoying, and not at
|
|
||||||
# all POLS.
|
|
||||||
class Object
|
|
||||||
def __ne__(b)
|
|
||||||
self != b
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
module REXML
|
module REXML
|
||||||
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
||||||
# for this class. Believe me. You don't want to poke around in here.
|
# for this class. Believe me. You don't want to poke around in here.
|
||||||
|
@ -132,11 +122,10 @@ module REXML
|
||||||
when :child
|
when :child
|
||||||
#puts "CHILD"
|
#puts "CHILD"
|
||||||
new_nodeset = []
|
new_nodeset = []
|
||||||
ps_clone = nil
|
nt = nil
|
||||||
for node in nodeset
|
for node in nodeset
|
||||||
#ps_clone = path_stack.clone
|
nt = node.node_type
|
||||||
#new_nodeset += internal_parse( ps_clone, node.children ) if node.parent?
|
new_nodeset += node.children if nt == :element or nt == :document
|
||||||
new_nodeset += node.children if node.parent?
|
|
||||||
end
|
end
|
||||||
#path_stack[0,(path_stack.size-ps_clone.size)] = []
|
#path_stack[0,(path_stack.size-ps_clone.size)] = []
|
||||||
return new_nodeset
|
return new_nodeset
|
||||||
|
@ -238,9 +227,11 @@ module REXML
|
||||||
when :descendant
|
when :descendant
|
||||||
#puts ":DESCENDANT"
|
#puts ":DESCENDANT"
|
||||||
results = []
|
results = []
|
||||||
|
nt = nil
|
||||||
for node in nodeset
|
for node in nodeset
|
||||||
|
nt = node.node_type
|
||||||
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
|
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
|
||||||
node.children ) if node.parent?
|
node.children ) if nt == :element or nt == :document
|
||||||
end
|
end
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@ -310,11 +301,13 @@ module REXML
|
||||||
def d_o_s( p, ns, r )
|
def d_o_s( p, ns, r )
|
||||||
#puts r.collect{|n|n.to_s}.inspect
|
#puts r.collect{|n|n.to_s}.inspect
|
||||||
#puts ns.collect{|n|n.to_s}.inspect
|
#puts ns.collect{|n|n.to_s}.inspect
|
||||||
|
nt = nil
|
||||||
ns.each_index do |i|
|
ns.each_index do |i|
|
||||||
n = ns[i]
|
n = ns[i]
|
||||||
x = match( p.clone, [ n ] )
|
x = match( p.clone, [ n ] )
|
||||||
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
|
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
|
||||||
d_o_s( p, n.children, x ) if n.parent?
|
nt = n.node_type
|
||||||
|
d_o_s( p, n.children, x ) if nt == :element or nt == :document
|
||||||
r[i,0] = [x] if x.size > 0
|
r[i,0] = [x] if x.size > 0
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue