mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* Changes to the encoding mechanism. If iconv is found, it is used first
for encoding changes. This should be the case on all 1.8 installations. When it isn't found (<1.6), the native REXML encoding mechanism is used. This cleaned out some files, and tightened up the code a bit; and iconv should be faster than the pure Ruby code. * Changed deprecated assert_not_nil to assert throughout the tests. * Parse exceptions are a little more verbose, and extend RuntimeError. * Bug fixes to XPathParser * The Light API is still shifting, like the sands of the desert. * Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and tightened error reporting in the base parser git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
662532be00
commit
7d21c237cc
23 changed files with 185 additions and 224 deletions
|
@ -2,61 +2,49 @@ module REXML
|
|||
module Encoding
|
||||
@@uconv_available = false
|
||||
|
||||
ENCODING_CLAIMS = { }
|
||||
|
||||
def Encoding.claim( encoding_str, match=nil )
|
||||
if match
|
||||
ENCODING_CLAIMS[ match ] = encoding_str
|
||||
else
|
||||
ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
|
||||
end
|
||||
end
|
||||
|
||||
# Native, default format is UTF-8, so it is declared here rather than in
|
||||
# an encodings/ definition.
|
||||
UTF_8 = 'UTF-8'
|
||||
claim( UTF_8 )
|
||||
UTF_16 = 'UTF-16'
|
||||
UNILE = 'UNILE'
|
||||
|
||||
# ID ---> Encoding name
|
||||
attr_reader :encoding
|
||||
def encoding=( enc )
|
||||
enc = UTF_8 unless enc
|
||||
old_verbosity = $VERBOSE
|
||||
begin
|
||||
$VERBOSE = false
|
||||
return if defined? @encoding and enc == @encoding
|
||||
if enc and enc != UTF_8
|
||||
@encoding = enc.upcase
|
||||
require "rexml/encodings/#@encoding" unless @encoding == UTF_8
|
||||
begin
|
||||
load 'rexml/encodings/ICONV.rb'
|
||||
Iconv::iconv( UTF_8, @encoding, "" )
|
||||
rescue LoadError, Exception => err
|
||||
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
||||
begin
|
||||
load enc_file
|
||||
rescue LoadError
|
||||
raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
|
||||
end
|
||||
end
|
||||
else
|
||||
enc = UTF_8
|
||||
@encoding = enc.upcase
|
||||
load 'rexml/encodings/UTF-8.rb'
|
||||
end
|
||||
ensure
|
||||
$VERBOSE = old_verbosity
|
||||
end
|
||||
end
|
||||
|
||||
def check_encoding str
|
||||
rv = ENCODING_CLAIMS.find{|k,v| str =~ k }
|
||||
# Raise an exception if there is a declared encoding and we don't
|
||||
# recognize it
|
||||
unless rv
|
||||
if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/
|
||||
raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
|
||||
else
|
||||
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||
return UTF_16 if str[0] == 254 && str[1] == 255
|
||||
return UNILE if str[0] == 255 && str[1] == 254
|
||||
str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
|
||||
return $1.upcase if $1
|
||||
return UTF_8
|
||||
end
|
||||
end
|
||||
return rv[1]
|
||||
end
|
||||
|
||||
def to_utf_8(str)
|
||||
return str
|
||||
end
|
||||
|
||||
def from_utf_8 content
|
||||
return content
|
||||
end
|
||||
end
|
||||
|
||||
module Encodingses
|
||||
encodings = []
|
||||
$:.each do |incl_dir|
|
||||
if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
|
||||
encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
|
||||
end
|
||||
encodings.collect!{ |f| File.basename(f) }
|
||||
encodings.uniq!
|
||||
end
|
||||
encodings.each { |enc| require "rexml/encodings/#{enc}" }
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,11 +3,11 @@ begin
|
|||
|
||||
module REXML
|
||||
module Encoding
|
||||
def from_euc_jp(str)
|
||||
def decode(str)
|
||||
return Uconv::euctou8(str)
|
||||
end
|
||||
|
||||
def to_euc_jp content
|
||||
def encode content
|
||||
return Uconv::u8toeuc(content)
|
||||
end
|
||||
end
|
||||
|
@ -17,12 +17,12 @@ rescue LoadError
|
|||
require 'iconv'
|
||||
module REXML
|
||||
module Encoding
|
||||
def from_euc_jp(str)
|
||||
return Iconv::iconv("utf-8", "euc-jp", str).join('')
|
||||
def decode(str)
|
||||
return Iconv::iconv("utf-8", "euc-jp", str)[0]
|
||||
end
|
||||
|
||||
def to_euc_jp content
|
||||
return Iconv::iconv("euc-jp", "utf-8", content).join('')
|
||||
def encode content
|
||||
return Iconv::iconv("euc-jp", "utf-8", content)[0]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
EUC_JP = 'EUC-JP'
|
||||
claim( EUC_JP )
|
||||
end
|
||||
end
|
14
lib/rexml/encodings/ICONV.rb
Normal file
14
lib/rexml/encodings/ICONV.rb
Normal file
|
@ -0,0 +1,14 @@
|
|||
require "iconv"
|
||||
raise LoadError unless defined? Iconv
|
||||
|
||||
module REXML
|
||||
module Encoding
|
||||
def decode( str )
|
||||
return Iconv::iconv(UTF_8, @encoding, str)[0]
|
||||
end
|
||||
|
||||
def encode( content )
|
||||
return Iconv::iconv(@encoding, UTF_8, content)[0]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,7 +1,7 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
# Convert from UTF-8
|
||||
def to_iso_8859_1 content
|
||||
def encode content
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -16,7 +16,7 @@ module REXML
|
|||
end
|
||||
|
||||
# Convert to UTF-8
|
||||
def from_iso_8859_1(str)
|
||||
def decode(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
ISO_8859_1 = 'ISO-8859-1'
|
||||
claim( ISO_8859_1 )
|
||||
end
|
||||
end
|
|
@ -1,6 +0,0 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
claim( 'Shift-JIS' )
|
||||
claim( 'Shift_JIS' )
|
||||
end
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
def to_unile content
|
||||
def encode content
|
||||
array_utf8 = content.unpack("U*")
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -15,7 +15,7 @@ module REXML
|
|||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
def from_unile(str)
|
||||
def decode(str)
|
||||
array_enc=str.unpack('C*')
|
||||
array_utf8 = []
|
||||
2.step(array_enc.size-1, 2){|i|
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
UNILE = 'UNILE'
|
||||
claim( UNILE, /^\377\376/ )
|
||||
end
|
||||
end
|
|
@ -1,7 +1,7 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
# Convert from UTF-8
|
||||
def to_us_ascii content
|
||||
def encode content
|
||||
array_utf8 = content.unpack('U*')
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -16,7 +16,7 @@ module REXML
|
|||
end
|
||||
|
||||
# Convert to UTF-8
|
||||
def from_us_ascii(str)
|
||||
def decode(str)
|
||||
str.unpack('C*').pack('U*')
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
US_ASCII = 'US-ASCII'
|
||||
claim( US_ASCII )
|
||||
end
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
def to_utf_16 content
|
||||
def encode content
|
||||
array_utf8 = content.unpack("U*")
|
||||
array_enc = []
|
||||
array_utf8.each do |num|
|
||||
|
@ -15,7 +15,7 @@ module REXML
|
|||
array_enc.pack('C*')
|
||||
end
|
||||
|
||||
def from_utf_16(str)
|
||||
def decode(str)
|
||||
array_enc=str.unpack('C*')
|
||||
array_utf8 = []
|
||||
2.step(arrayEnc.size-1, 2){|i|
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
UTF_16 = 'UTF-16'
|
||||
claim( UTF_16, /^\376\377/ )
|
||||
end
|
||||
end
|
11
lib/rexml/encodings/UTF-8.rb
Normal file
11
lib/rexml/encodings/UTF-8.rb
Normal file
|
@ -0,0 +1,11 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
def encode content
|
||||
content
|
||||
end
|
||||
|
||||
def decode(str)
|
||||
str
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,76 +1,58 @@
|
|||
require 'rexml/xmltokens'
|
||||
require 'rexml/light/node'
|
||||
|
||||
# Development model
|
||||
# document = Node.new
|
||||
|
||||
# Add an element "foo" to the document
|
||||
# foo = document << "foo"
|
||||
# # Set attribute "attr" on foo
|
||||
# foo["attr"] = "la"
|
||||
# # Set another attribute in a different namespace
|
||||
# foo["attr", "namespace"] = "too"
|
||||
# # Swap foo into another namespace
|
||||
# foo.namespace = "blah"
|
||||
# # Add a couple of element nodes to foo
|
||||
# foo << "a"
|
||||
# foo << "b"
|
||||
# # Access the children of foo in various ways
|
||||
# a = foo[0]
|
||||
# foo.each { |child|
|
||||
# #...
|
||||
# }
|
||||
# # Add text to foo
|
||||
# # Add instruction
|
||||
# # Add comment
|
||||
# # Get the root of the document
|
||||
# document == a.root
|
||||
# # Write the document out
|
||||
# puts document.to_s
|
||||
# [ :element, parent, name, attributes, children* ]
|
||||
# a = Node.new
|
||||
# a << "B" # => <a>B</a>
|
||||
# a.b # => <a>B<b/></a>
|
||||
# a.b[1] # => <a>B<b/><b/><a>
|
||||
# a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
|
||||
# a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
|
||||
# a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
|
||||
module REXML
|
||||
module Light
|
||||
# Represents a tagged XML element. Elements are characterized by
|
||||
# having children, attributes, and names, and can themselves be
|
||||
# children.
|
||||
class Node < Array
|
||||
alias :_old_get :[]
|
||||
alias :_old_put :[]=
|
||||
|
||||
class Node
|
||||
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
|
||||
PARENTS = [ :element, :document, :doctype ]
|
||||
# Create a new element.
|
||||
def initialize node=nil
|
||||
@node = node
|
||||
if node.kind_of? String
|
||||
node = [ :text, node ]
|
||||
elsif node.nil?
|
||||
node = [ :document, nil, nil ]
|
||||
elsif node[0] == :start_element
|
||||
node[0] = :element
|
||||
elsif node[0] == :start_doctype
|
||||
node[0] = :doctype
|
||||
elsif node[0] == :start_document
|
||||
node[0] = :document
|
||||
end
|
||||
replace( node )
|
||||
_old_put( 1, 0, 1 )
|
||||
_old_put( 1, nil )
|
||||
end
|
||||
|
||||
def size
|
||||
el!()
|
||||
super-4
|
||||
if PARENTS.include? @node[0]
|
||||
@node[-1].size
|
||||
else
|
||||
0
|
||||
end
|
||||
end
|
||||
|
||||
def each( &block )
|
||||
el!()
|
||||
size.times { |x| yield( at(x+4) ) }
|
||||
end
|
||||
|
||||
def name
|
||||
el!()
|
||||
at(2)
|
||||
end
|
||||
|
||||
def name=( name_str, ns=nil )
|
||||
el!()
|
||||
pfx = ''
|
||||
pfx = "#{prefix(ns)}:" if ns
|
||||
_old_put(1, "#{pfx}#{name_str}")
|
||||
_old_put(2, "#{pfx}#{name_str}")
|
||||
end
|
||||
|
||||
def parent=( node )
|
||||
|
@ -78,28 +60,23 @@ module REXML
|
|||
end
|
||||
|
||||
def local_name
|
||||
el!()
|
||||
namesplit
|
||||
@name
|
||||
end
|
||||
|
||||
def local_name=( name_str )
|
||||
el!()
|
||||
_old_put( 1, "#@prefix:#{name_str}" )
|
||||
end
|
||||
|
||||
def prefix( namespace=nil )
|
||||
el!()
|
||||
prefix_of( self, namespace )
|
||||
end
|
||||
|
||||
def namespace( prefix=prefix() )
|
||||
el!()
|
||||
namespace_of( self, prefix )
|
||||
end
|
||||
|
||||
def namespace=( namespace )
|
||||
el!()
|
||||
@prefix = prefix( namespace )
|
||||
pfx = ''
|
||||
pfx = "#@prefix:" if @prefix.size > 0
|
||||
|
@ -107,7 +84,6 @@ module REXML
|
|||
end
|
||||
|
||||
def []( reference, ns=nil )
|
||||
el!()
|
||||
if reference.kind_of? String
|
||||
pfx = ''
|
||||
pfx = "#{prefix(ns)}:" if ns
|
||||
|
@ -125,7 +101,6 @@ module REXML
|
|||
|
||||
# Doesn't handle namespaces yet
|
||||
def []=( reference, ns, value=nil )
|
||||
el!()
|
||||
if reference.kind_of? String
|
||||
value = ns unless value
|
||||
at( 3 )[reference] = value
|
||||
|
@ -170,12 +145,10 @@ module REXML
|
|||
end
|
||||
|
||||
def has_name?( name, namespace = '' )
|
||||
el!()
|
||||
at(3) == name and namespace() == namespace
|
||||
end
|
||||
|
||||
def children
|
||||
el!()
|
||||
self
|
||||
end
|
||||
|
||||
|
@ -187,14 +160,6 @@ module REXML
|
|||
|
||||
end
|
||||
|
||||
def el!
|
||||
if node_type() != :element and node_type() != :document
|
||||
_old_put( 0, :element )
|
||||
push({})
|
||||
end
|
||||
self
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def namesplit
|
||||
|
|
|
@ -8,10 +8,6 @@ module REXML
|
|||
@output = real_IO
|
||||
self.encoding = encd
|
||||
|
||||
eval <<-EOL
|
||||
alias :encode :to_#{encoding.tr('-', '_').downcase}
|
||||
alias :decode :from_#{encoding.tr('-', '_').downcase}
|
||||
EOL
|
||||
@to_utf = encd == UTF_8 ? false : true
|
||||
end
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
module REXML
|
||||
class ParseException < Exception
|
||||
class ParseException < RuntimeError
|
||||
attr_accessor :source, :parser, :continued_exception
|
||||
|
||||
def initialize( message, source=nil, parser=nil, exception=nil )
|
||||
|
@ -12,9 +12,9 @@ module REXML
|
|||
def to_s
|
||||
# Quote the original exception, if there was one
|
||||
if @continued_exception
|
||||
err = @continued_exception.message
|
||||
err = @continued_exception.inspect
|
||||
err << "\n"
|
||||
err << @continued_exception.backtrace[0..3].join("\n")
|
||||
err << @continued_exception.backtrace.join("\n")
|
||||
err << "\n...\n"
|
||||
else
|
||||
err = ""
|
||||
|
@ -24,17 +24,24 @@ module REXML
|
|||
err << super
|
||||
|
||||
# Add contextual information
|
||||
err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source
|
||||
err << "\nContext:\n#{@parser.context}" if @parser
|
||||
if @source
|
||||
err << "\nLine: #{line}\n"
|
||||
err << "Position: #{position}\n"
|
||||
err << "Last 80 unconsumed characters:\n"
|
||||
err << @source.buffer[0..80].gsub(/\n/, ' ')
|
||||
err << "\n"
|
||||
err << @source.buffer[0..80].unpack("U*").inspect
|
||||
end
|
||||
|
||||
err
|
||||
end
|
||||
|
||||
def position
|
||||
@source.current_line[0] if @source
|
||||
@source.current_line[0] if @source and @source.current_line
|
||||
end
|
||||
|
||||
def line
|
||||
@source.current_line[2] if @source
|
||||
@source.current_line[2] if @source and @source.current_line
|
||||
end
|
||||
|
||||
def context
|
||||
|
|
|
@ -89,10 +89,10 @@ module REXML
|
|||
EREFERENCE = /&(?!#{NAME};)/
|
||||
|
||||
DEFAULT_ENTITIES = {
|
||||
'gt' => [/>/, '>', '>'],
|
||||
'lt' => [/</, '<', '<'],
|
||||
'quot' => [/"/, '"', '"'],
|
||||
"apos" => [/'/, "'", "'"]
|
||||
'gt' => [/>/, '>', '>', />/],
|
||||
'lt' => [/</, '<', '<', /</],
|
||||
'quot' => [/"/, '"', '"', /"/],
|
||||
"apos" => [/'/, "'", "'", /'/]
|
||||
}
|
||||
|
||||
def initialize( source )
|
||||
|
@ -126,6 +126,7 @@ module REXML
|
|||
|
||||
# Returns true if there are more events. Synonymous with !empty?
|
||||
def has_next?
|
||||
return true if @closed
|
||||
@source.read if @source.buffer.size==0 and !@source.empty?
|
||||
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
|
||||
end
|
||||
|
@ -143,7 +144,7 @@ module REXML
|
|||
# event, so you can effectively pre-parse the entire document (pull the
|
||||
# entire thing into memory) using this method.
|
||||
def peek depth=0
|
||||
raise 'Illegal argument "#{depth}"' if depth < -1
|
||||
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
||||
temp = []
|
||||
if depth == -1
|
||||
temp.push(pull()) until empty?
|
||||
|
@ -166,8 +167,9 @@ module REXML
|
|||
return @stack.shift if @stack.size > 0
|
||||
@source.read if @source.buffer.size==0
|
||||
if @document_status == nil
|
||||
@source.match( /^\s*/um, true )
|
||||
word = @source.match( /^\s*(<.*?)>/um )
|
||||
@source.consume( /^\s*/um )
|
||||
word = @source.match( /(<.*?)>/um )
|
||||
#word = @source.match_to( '>', /(<.*?)>/um )
|
||||
word = word[1] unless word.nil?
|
||||
case word
|
||||
when COMMENT_START
|
||||
|
@ -190,7 +192,7 @@ module REXML
|
|||
close = md[2]
|
||||
identity =~ IDENTITY
|
||||
name = $1
|
||||
raise "DOCTYPE is missing a name" if name.nil?
|
||||
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
|
||||
pub_sys = $2.nil? ? nil : $2.strip
|
||||
long_name = $3.nil? ? nil : $3.strip
|
||||
uri = $4.nil? ? nil : $4.strip
|
||||
|
@ -278,6 +280,7 @@ module REXML
|
|||
if @source.buffer[0] == ?<
|
||||
if @source.buffer[1] == ?/
|
||||
last_tag = @tags.pop
|
||||
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
||||
md = @source.match( CLOSE_MATCH, true )
|
||||
raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
|
||||
"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
|
||||
|
@ -286,18 +289,20 @@ module REXML
|
|||
md = @source.match(/\A(\s*[^>]*>)/um)
|
||||
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
||||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
||||
case md[1]
|
||||
when CDATA_START
|
||||
return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
|
||||
when COMMENT_START
|
||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
||||
if md[0][2] == ?-
|
||||
md = @source.match( COMMENT_PATTERN, true )
|
||||
return [ :comment, md[1] ] if md
|
||||
else
|
||||
md = @source.match( CDATA_PATTERN, true )
|
||||
return [ :cdata, md[1] ] if md
|
||||
end
|
||||
raise REXML::ParseException.new( "Declarations can only occur "+
|
||||
"in the doctype declaration.", @source)
|
||||
end
|
||||
elsif @source.buffer[1] == ??
|
||||
md = @source.match( INSTRUCTION_PATTERN, true )
|
||||
return [ :processing_instruction, md[1], md[2] ]
|
||||
return [ :processing_instruction, md[1], md[2] ] if md
|
||||
raise REXML::ParseException.new( "Bad instruction declaration",
|
||||
@source)
|
||||
else
|
||||
# Get the next tag
|
||||
md = @source.match(TAG_MATCH, true)
|
||||
|
@ -319,16 +324,18 @@ module REXML
|
|||
end
|
||||
else
|
||||
md = @source.match( TEXT_PATTERN, true )
|
||||
raise "no text to add" if md[0].length == 0
|
||||
#md = @source.match_to_consume( '<', TEXT_PATTERN )
|
||||
#@source.read
|
||||
raise REXML::ParseException("no text to add") if md[0].length == 0
|
||||
# unnormalized = Text::unnormalize( md[1], self )
|
||||
# return PullEvent.new( :text, md[1], unnormalized )
|
||||
return [ :text, md[1] ]
|
||||
end
|
||||
rescue REXML::ParseException
|
||||
raise $!
|
||||
raise
|
||||
rescue Exception, NameError => error
|
||||
raise REXML::ParseException.new( "Exception parsing",
|
||||
@source, self, error )
|
||||
@source, self, (error ? error : $!) )
|
||||
end
|
||||
return [ :dummy ]
|
||||
end
|
||||
|
@ -354,7 +361,7 @@ module REXML
|
|||
end if entities
|
||||
copy.gsub!( EREFERENCE, '&' )
|
||||
DEFAULT_ENTITIES.each do |key, value|
|
||||
copy.gsub!( value[2], value[1] )
|
||||
copy.gsub!( value[3], value[1] )
|
||||
end
|
||||
copy
|
||||
end
|
||||
|
|
|
@ -16,25 +16,25 @@ module REXML
|
|||
end
|
||||
|
||||
def parse
|
||||
root = context = REXML::Light::Node.new([ :document ])
|
||||
root = context = [ :document ]
|
||||
while true
|
||||
event = @parser.pull
|
||||
case event[0]
|
||||
when :end_document
|
||||
break
|
||||
when :end_doctype
|
||||
context = context.parent
|
||||
context = context[1]
|
||||
when :start_element, :start_doctype
|
||||
new_node = REXML::Light::Node.new(event)
|
||||
new_node = event
|
||||
context << new_node
|
||||
new_node.parent = context
|
||||
new_node[1,0] = [context]
|
||||
context = new_node
|
||||
when :end_element, :end_doctype
|
||||
context = context.parent
|
||||
context = context[1]
|
||||
else
|
||||
new_node = REXML::Light::Node.new(event)
|
||||
new_node = event
|
||||
context << new_node
|
||||
new_node.parent = context
|
||||
new_node[1,0] = [context]
|
||||
end
|
||||
end
|
||||
root
|
||||
|
|
|
@ -31,7 +31,7 @@ module REXML
|
|||
results = filter([element], path)
|
||||
when /^\*/u
|
||||
results = filter(element.to_a, path)
|
||||
when /^[\[!\w:]/u
|
||||
when /^[[!\w:]/u
|
||||
# match on child
|
||||
matches = []
|
||||
children = element.to_a
|
||||
|
|
|
@ -21,6 +21,6 @@
|
|||
# A tutorial is available in docs/tutorial.html
|
||||
module REXML
|
||||
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
|
||||
Date = "+2003/110"
|
||||
Version = "2.7.1"
|
||||
Date = "+2003/283"
|
||||
Version = "2.7.2"
|
||||
end
|
||||
|
|
|
@ -39,10 +39,6 @@ module REXML
|
|||
# Overridden to support optimized en/decoding
|
||||
def encoding=(enc)
|
||||
super
|
||||
eval <<-EOL
|
||||
alias :encode :to_#{encoding.tr('-', '_').downcase}
|
||||
alias :decode :from_#{encoding.tr('-', '_').downcase}
|
||||
EOL
|
||||
@line_break = encode( '>' )
|
||||
if enc != UTF_8
|
||||
@buffer = decode(@buffer)
|
||||
|
@ -78,8 +74,22 @@ module REXML
|
|||
def read
|
||||
end
|
||||
|
||||
def consume( pattern )
|
||||
@buffer = $' if pattern.match( @buffer )
|
||||
end
|
||||
|
||||
def match_to( char, pattern )
|
||||
return pattern.match(@buffer)
|
||||
end
|
||||
|
||||
def match_to_consume( char, pattern )
|
||||
md = pattern.match(@buffer)
|
||||
@buffer = $'
|
||||
return md
|
||||
end
|
||||
|
||||
def match pattern, consume=false
|
||||
md = pattern.match @buffer
|
||||
md = pattern.match(@buffer)
|
||||
@buffer = $' if consume and md
|
||||
return md
|
||||
end
|
||||
|
@ -112,7 +122,9 @@ module REXML
|
|||
#@block_size = block_size
|
||||
#super @source.read(@block_size)
|
||||
@line_break = '>'
|
||||
super @source.readline( @line_break )
|
||||
#super @source.readline( "\n" )
|
||||
super @source.readline( @line_break )+@source.read
|
||||
@line_break = encode( '>' )
|
||||
end
|
||||
|
||||
def scan pattern, consume=false
|
||||
|
@ -145,11 +157,15 @@ module REXML
|
|||
str = @source.readline('>')
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
rescue
|
||||
rescue Exception, NameError
|
||||
@source = nil
|
||||
end
|
||||
end
|
||||
|
||||
def consume( pattern )
|
||||
match( pattern, true )
|
||||
end
|
||||
|
||||
def match pattern, consume=false
|
||||
rv = pattern.match(@buffer)
|
||||
@buffer = $' if consume and rv
|
||||
|
|
|
@ -2,16 +2,6 @@ require 'rexml/namespace'
|
|||
require 'rexml/xmltokens'
|
||||
require 'rexml/parsers/xpathparser'
|
||||
|
||||
# Ignore this class. It adds a __ne__ method, because Ruby doesn't seem to
|
||||
# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
|
||||
# and all of the other comparison methods. Stupid, and annoying, and not at
|
||||
# all POLS.
|
||||
class Object
|
||||
def __ne__(b)
|
||||
self != b
|
||||
end
|
||||
end
|
||||
|
||||
module REXML
|
||||
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
||||
# for this class. Believe me. You don't want to poke around in here.
|
||||
|
@ -132,11 +122,10 @@ module REXML
|
|||
when :child
|
||||
#puts "CHILD"
|
||||
new_nodeset = []
|
||||
ps_clone = nil
|
||||
nt = nil
|
||||
for node in nodeset
|
||||
#ps_clone = path_stack.clone
|
||||
#new_nodeset += internal_parse( ps_clone, node.children ) if node.parent?
|
||||
new_nodeset += node.children if node.parent?
|
||||
nt = node.node_type
|
||||
new_nodeset += node.children if nt == :element or nt == :document
|
||||
end
|
||||
#path_stack[0,(path_stack.size-ps_clone.size)] = []
|
||||
return new_nodeset
|
||||
|
@ -238,9 +227,11 @@ module REXML
|
|||
when :descendant
|
||||
#puts ":DESCENDANT"
|
||||
results = []
|
||||
nt = nil
|
||||
for node in nodeset
|
||||
nt = node.node_type
|
||||
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
|
||||
node.children ) if node.parent?
|
||||
node.children ) if nt == :element or nt == :document
|
||||
end
|
||||
return results
|
||||
|
||||
|
@ -310,11 +301,13 @@ module REXML
|
|||
def d_o_s( p, ns, r )
|
||||
#puts r.collect{|n|n.to_s}.inspect
|
||||
#puts ns.collect{|n|n.to_s}.inspect
|
||||
nt = nil
|
||||
ns.each_index do |i|
|
||||
n = ns[i]
|
||||
x = match( p.clone, [ n ] )
|
||||
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
|
||||
d_o_s( p, n.children, x ) if n.parent?
|
||||
nt = n.node_type
|
||||
d_o_s( p, n.children, x ) if nt == :element or nt == :document
|
||||
r[i,0] = [x] if x.size > 0
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue