From bef1521876fe0e96853b28b0775839148b412056 Mon Sep 17 00:00:00 2001 From: ser Date: Thu, 10 Jun 2004 02:09:37 +0000 Subject: [PATCH] This is the log for the *previous* commit, but CVS is bloody stupid. * Added XPath expansion and abbreviation to Parsers::XPathParser * Improved the look of Element.inspect * Added xpath() to Element and Attribute, allowing the generation of a unique xpath for nodes of these types. This method for the other nodes still need to be done * Made REXML::XPathParser#match public First pass at validation support. Minimal RelaxNG support. * The tree parser is now an independant parser, like the rest. * The first basic RelaxNG support is in. It supports elements, attributes, choice, sequence, oneOrMany, zeroOrMany, and optional. Improved support for converting XPaths to strings. * XPath wasn't parsing ")" correctly. Validation improvements: * Fixed text * Fixed attributes in choices * Fixed text in choices. This change improves handling of all events that occur without an end step (which is most of them). * Fixed a bunch of cases * Added support for * Added support for Workin' in the coal mine, goin' down, down, down... * Entirely rewrote the validation code; the finite state machine, while cool, didn't survive the encounter with Interleave. It was getting sort of hacky, too. The new mechanism is less elegant, but is basically still a FSM, and is more flexible without having to add hacks to extend it. Large chunks of the FSM may be reusable in other validation mechanisms. * Added interleave support * Added suppert for mixed * Added Kou's patch to normalize attribute values passed through the SAX2 and Stream parsers. * Applied Kou's preceding-sibling patch, which fixes the order of the axe results git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6442 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- MANIFEST | 1 + lib/rexml/parsers/treeparser.rb | 88 +++++++++++++++++++++++++++++++++ lib/rexml/rexml.rb | 1 + 3 files changed, 90 insertions(+) create mode 100644 lib/rexml/parsers/treeparser.rb diff --git a/MANIFEST b/MANIFEST index ae9515727c..27cdccf19c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -332,6 +332,7 @@ lib/rexml/parsers/baseparser.rb lib/rexml/parsers/lightparser.rb lib/rexml/parsers/pullparser.rb lib/rexml/parsers/sax2parser.rb +lib/rexml/parsers/treeparser.rb lib/rexml/parsers/streamparser.rb lib/rexml/parsers/ultralightparser.rb lib/rexml/parsers/xpathparser.rb diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb new file mode 100644 index 0000000000..931da73025 --- /dev/null +++ b/lib/rexml/parsers/treeparser.rb @@ -0,0 +1,88 @@ +module REXML + module Parsers + class TreeParser + def initialize( source, build_context = Document.new ) + @build_context = build_context + @parser = Parsers::BaseParser.new( source ) + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + def parse + tag_stack = [] + in_doctype = false + entities = nil + begin + while true + event = @parser.pull + case event[0] + when :end_document + return + when :start_element + tag_stack.push(event[1]) + # find the observers for namespaces + @build_context = @build_context.add_element( event[1], event[2] ) + when :end_element + tag_stack.pop + @build_context = @build_context.parent + when :text + if not in_doctype + if @build_context[-1].instance_of? Text + @build_context[-1] << event[1] + else + @build_context.add( + Text.new( event[1], @build_context.whitespace, nil, true ) + ) unless ( + event[1].strip.size==0 and + @build_context.ignore_whitespace_nodes + ) + end + end + when :comment + c = Comment.new( event[1] ) + @build_context.add( c ) + when :cdata + c = CData.new( event[1] ) + @build_context.add( c ) + when :processing_instruction + @build_context.add( Instruction.new( event[1], event[2] ) ) + when :end_doctype + in_doctype = false + entities.each { |k,v| entities[k] = @build_context.entities[k].value } + @build_context = @build_context.parent + when :start_doctype + doctype = DocType.new( event[1..-1], @build_context ) + @build_context = doctype + entities = {} + in_doctype = true + when :attlistdecl + n = AttlistDecl.new( event[1..-1] ) + @build_context.add( n ) + when :externalentity + n = ExternalEntity.new( event[1] ) + @build_context.add( n ) + when :elementdecl + n = ElementDecl.new( event[1] ) + @build_context.add(n) + when :entitydecl + entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/ + @build_context.add(Entity.new(event)) + when :notationdecl + n = NotationDecl.new( *event[1..-1] ) + @build_context.add( n ) + when :xmldecl + x = XMLDecl.new( event[1], event[2], event[3] ) + @build_context.add( x ) + end + end + rescue REXML::Validation::ValidationException + raise + rescue + raise ParseException.new( $!.message, @parser.source, @parser, $! ) + end + end + end + end +end diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 40995d7166..5bdc0bb867 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -23,4 +23,5 @@ module REXML Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell " Date = "+2004/162" Version = "3.1.1" + end