2003-06-09 21:31:01 -04:00
|
|
|
require 'rexml/parseexception'
|
|
|
|
require 'rexml/source'
|
|
|
|
|
|
|
|
module REXML
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
module Parsers
|
|
|
|
# = Using the Pull Parser
|
|
|
|
# <em>This API is experimental, and subject to change.</em>
|
|
|
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
|
|
|
# while parser.has_next?
|
|
|
|
# res = parser.next
|
|
|
|
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
|
|
|
# end
|
|
|
|
# See the PullEvent class for information on the content of the results.
|
|
|
|
# The data is identical to the arguments passed for the various events to
|
|
|
|
# the StreamListener API.
|
|
|
|
#
|
|
|
|
# Notice that:
|
|
|
|
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
|
|
|
# while parser.has_next?
|
|
|
|
# res = parser.next
|
|
|
|
# raise res[1] if res.error?
|
|
|
|
# end
|
|
|
|
#
|
|
|
|
# Nat Price gave me some good ideas for the API.
|
|
|
|
class BaseParser
|
|
|
|
NCNAME_STR= '[\w:][\-\w\d.]*'
|
|
|
|
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
NAMECHAR = '[\-\w\d\.:]'
|
|
|
|
NAME = "([\\w:]#{NAMECHAR}*)"
|
|
|
|
NMTOKEN = "(?:#{NAMECHAR})+"
|
|
|
|
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
|
|
|
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
|
|
|
REFERENCE_RE = /#{REFERENCE}/
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
|
|
|
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
|
|
|
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
|
|
|
COMMENT_START = /\A<!--/u
|
|
|
|
COMMENT_PATTERN = /<!--(.*?)-->/um
|
|
|
|
CDATA_START = /\A<!\[CDATA\[/u
|
|
|
|
CDATA_END = /^\s*\]\s*>/um
|
|
|
|
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
|
|
|
XMLDECL_START = /\A<\?xml\s/u;
|
|
|
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
|
|
|
INSTRUCTION_START = /\A<\?/u
|
|
|
|
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
|
|
|
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
|
|
|
|
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
2006-10-09 10:00:59 -04:00
|
|
|
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
|
|
|
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
ENTITY_START = /^\s*<!ENTITY/
|
Merges upstream changes for REXML v3.1.7
http://www.germane-software.com/repos/rexml/tags/3.1.7
r1278@bean: ser | 2007-06-07 00:53:06 -0400
Fixed a double-encoding bug. This was a regression, related
to ticket:48.
r1292@bean: ser | 2007-07-25 08:19:36 -0400
r1279@bean: ser | 2007-06-09 23:19:02 -0400
Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same
problem.
Also in this patch is a fix to merge.rb (unused, but it should at least
contain no errors), and a unit test for ticket:88.
r1293@bean: ser | 2007-07-25 08:19:37 -0400
r1281@bean: ser | 2007-07-24 11:08:48 -0400
Addresses ticket:85
This is a major rewrite of the XML formatting code. The XML writers have all
been extracted out of the classes and put into their own class containers.
This makes writing parsers easier, and cleaner.
There are three formatters, which correspond to the previous three XML writing
modes:
REXML::Formatters::Default
Prints the XML document exactly as it was parsed
REXML::Formatters::Pretty
Pretty prints the XML document, destroying whitespace in the document
REXML::Formatters::Transitive
Pretty prints the XML document, preserving whitespace
All of the write() functions have been deprecated (some are still used, but
these will also go away) except the write() function on Document, which is left
for convenience. To pretty print an XML document the canonical way:
formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces
formatter.write( document, output )
r1294@bean: ser | 2007-07-25 08:19:38 -0400
r1283@bean: ser | 2007-07-24 19:53:30 -0400
This goes with the previous commit.
r1295@bean: ser | 2007-07-25 08:19:39 -0400
r1285@bean: ser | 2007-07-24 20:02:07 -0400
And THIS goes with the previous two patches. Dammit.
r1296@bean: ser | 2007-07-25 08:19:40 -0400
r1287@bean: ser | 2007-07-24 20:12:25 -0400
Applied patch from Jeff Barczewski. Note that this changes what the values of
the name and IDs are from the previous behavior -- the values no longer include
the quotes. This is the correct behavior, so I'm leaving it in, but it is not
backwards compatible. Also fixes the serializer so that it outputs the doctype
in a correct format (needed as a result of this change).
r1297@bean: ser | 2007-07-25 08:38:38 -0400
Version update
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-25 08:47:23 -04:00
|
|
|
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
|
|
|
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
|
|
|
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
|
|
|
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
|
|
|
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
|
|
|
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
|
|
|
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
|
|
|
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
|
|
|
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
|
|
|
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
|
|
|
ATTDEF_RE = /#{ATTDEF}/
|
|
|
|
ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
|
|
|
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
|
|
NOTATIONDECL_START = /^\s*<!NOTATION/um
|
|
|
|
PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
|
|
|
SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
TEXT_PATTERN = /\A([^<]*)/um
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Entity constants
|
|
|
|
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
|
|
|
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
|
|
|
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
|
|
|
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
|
|
|
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
|
|
|
PEREFERENCE = "%#{NAME};"
|
|
|
|
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
|
|
|
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
|
|
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
|
|
|
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
|
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
|
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
EREFERENCE = /&(?!#{NAME};)/
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
DEFAULT_ENTITIES = {
|
|
|
|
'gt' => [/>/, '>', '>', />/],
|
|
|
|
'lt' => [/</, '<', '<', /</],
|
|
|
|
'quot' => [/"/, '"', '"', /"/],
|
|
|
|
"apos" => [/'/, "'", "'", /'/]
|
|
|
|
}
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2006-09-07 22:03:44 -04:00
|
|
|
|
|
|
|
######################################################################
|
|
|
|
# These are patterns to identify common markup errors, to make the
|
|
|
|
# error messages more informative.
|
|
|
|
######################################################################
|
|
|
|
MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
|
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
def initialize( source )
|
|
|
|
self.stream = source
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
2004-06-09 22:01:04 -04:00
|
|
|
def add_listener( listener )
|
|
|
|
if !defined?(@listeners) or !@listeners
|
|
|
|
@listeners = []
|
|
|
|
instance_eval <<-EOL
|
|
|
|
alias :_old_pull :pull
|
|
|
|
def pull
|
|
|
|
event = _old_pull
|
|
|
|
@listeners.each do |listener|
|
|
|
|
listener.receive event
|
|
|
|
end
|
|
|
|
event
|
|
|
|
end
|
|
|
|
EOL
|
|
|
|
end
|
|
|
|
@listeners << listener
|
|
|
|
end
|
|
|
|
|
2004-05-16 11:17:31 -04:00
|
|
|
attr_reader :source
|
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
def stream=( source )
|
|
|
|
@source = SourceFactory.create_from( source )
|
|
|
|
@closed = nil
|
|
|
|
@document_status = nil
|
|
|
|
@tags = []
|
|
|
|
@stack = []
|
|
|
|
@entities = []
|
|
|
|
end
|
|
|
|
|
|
|
|
def position
|
|
|
|
if @source.respond_to? :position
|
|
|
|
@source.position
|
|
|
|
else
|
|
|
|
# FIXME
|
|
|
|
0
|
|
|
|
end
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Returns true if there are no more events
|
|
|
|
def empty?
|
2004-02-13 17:40:14 -05:00
|
|
|
return (@source.empty? and @stack.empty?)
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Returns true if there are more events. Synonymous with !empty?
|
|
|
|
def has_next?
|
2004-02-13 17:40:14 -05:00
|
|
|
return !(@source.empty? and @stack.empty?)
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Push an event back on the head of the stream. This method
|
|
|
|
# has (theoretically) infinite depth.
|
|
|
|
def unshift token
|
|
|
|
@stack.unshift(token)
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Peek at the +depth+ event in the stack. The first element on the stack
|
|
|
|
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
|
|
|
# stream and return the last event, which is always :end_document.
|
|
|
|
# Be aware that this causes the stream to be parsed up to the +depth+
|
|
|
|
# event, so you can effectively pre-parse the entire document (pull the
|
|
|
|
# entire thing into memory) using this method.
|
|
|
|
def peek depth=0
|
|
|
|
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
|
|
|
temp = []
|
|
|
|
if depth == -1
|
|
|
|
temp.push(pull()) until empty?
|
|
|
|
else
|
|
|
|
while @stack.size+temp.size < depth+1
|
|
|
|
temp.push(pull())
|
|
|
|
end
|
|
|
|
end
|
|
|
|
@stack += temp if temp.size > 0
|
|
|
|
@stack[depth]
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Returns the next event. This is a +PullEvent+ object.
|
|
|
|
def pull
|
|
|
|
if @closed
|
|
|
|
x, @closed = @closed, nil
|
|
|
|
return [ :end_element, x ]
|
|
|
|
end
|
|
|
|
return [ :end_document ] if empty?
|
|
|
|
return @stack.shift if @stack.size > 0
|
|
|
|
@source.read if @source.buffer.size<2
|
|
|
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
|
|
|
if @document_status == nil
|
|
|
|
#@source.consume( /^\s*/um )
|
|
|
|
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
|
|
|
|
word = word[1] unless word.nil?
|
|
|
|
#STDERR.puts "WORD = #{word.inspect}"
|
|
|
|
case word
|
|
|
|
when COMMENT_START
|
|
|
|
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
|
|
|
when XMLDECL_START
|
|
|
|
#STDERR.puts "XMLDECL"
|
|
|
|
results = @source.match( XMLDECL_PATTERN, true )[1]
|
|
|
|
version = VERSION.match( results )
|
|
|
|
version = version[1] unless version.nil?
|
|
|
|
encoding = ENCODING.match(results)
|
|
|
|
encoding = encoding[1] unless encoding.nil?
|
|
|
|
@source.encoding = encoding
|
|
|
|
standalone = STANDALONE.match(results)
|
|
|
|
standalone = standalone[1] unless standalone.nil?
|
|
|
|
return [ :xmldecl, version, encoding, standalone ]
|
|
|
|
when INSTRUCTION_START
|
|
|
|
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
|
|
|
when DOCTYPE_START
|
|
|
|
md = @source.match( DOCTYPE_PATTERN, true )
|
|
|
|
identity = md[1]
|
|
|
|
close = md[2]
|
|
|
|
identity =~ IDENTITY
|
|
|
|
name = $1
|
Merges upstream changes for REXML v3.1.7
http://www.germane-software.com/repos/rexml/tags/3.1.7
r1278@bean: ser | 2007-06-07 00:53:06 -0400
Fixed a double-encoding bug. This was a regression, related
to ticket:48.
r1292@bean: ser | 2007-07-25 08:19:36 -0400
r1279@bean: ser | 2007-06-09 23:19:02 -0400
Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same
problem.
Also in this patch is a fix to merge.rb (unused, but it should at least
contain no errors), and a unit test for ticket:88.
r1293@bean: ser | 2007-07-25 08:19:37 -0400
r1281@bean: ser | 2007-07-24 11:08:48 -0400
Addresses ticket:85
This is a major rewrite of the XML formatting code. The XML writers have all
been extracted out of the classes and put into their own class containers.
This makes writing parsers easier, and cleaner.
There are three formatters, which correspond to the previous three XML writing
modes:
REXML::Formatters::Default
Prints the XML document exactly as it was parsed
REXML::Formatters::Pretty
Pretty prints the XML document, destroying whitespace in the document
REXML::Formatters::Transitive
Pretty prints the XML document, preserving whitespace
All of the write() functions have been deprecated (some are still used, but
these will also go away) except the write() function on Document, which is left
for convenience. To pretty print an XML document the canonical way:
formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces
formatter.write( document, output )
r1294@bean: ser | 2007-07-25 08:19:38 -0400
r1283@bean: ser | 2007-07-24 19:53:30 -0400
This goes with the previous commit.
r1295@bean: ser | 2007-07-25 08:19:39 -0400
r1285@bean: ser | 2007-07-24 20:02:07 -0400
And THIS goes with the previous two patches. Dammit.
r1296@bean: ser | 2007-07-25 08:19:40 -0400
r1287@bean: ser | 2007-07-24 20:12:25 -0400
Applied patch from Jeff Barczewski. Note that this changes what the values of
the name and IDs are from the previous behavior -- the values no longer include
the quotes. This is the correct behavior, so I'm leaving it in, but it is not
backwards compatible. Also fixes the serializer so that it outputs the doctype
in a correct format (needed as a result of this change).
r1297@bean: ser | 2007-07-25 08:38:38 -0400
Version update
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-25 08:47:23 -04:00
|
|
|
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
pub_sys = $2.nil? ? nil : $2.strip
|
Merges upstream changes for REXML v3.1.7
http://www.germane-software.com/repos/rexml/tags/3.1.7
r1278@bean: ser | 2007-06-07 00:53:06 -0400
Fixed a double-encoding bug. This was a regression, related
to ticket:48.
r1292@bean: ser | 2007-07-25 08:19:36 -0400
r1279@bean: ser | 2007-06-09 23:19:02 -0400
Fixes ticket:89 -- encoding CP-1252 was broken. ISO-8859-15 had the same
problem.
Also in this patch is a fix to merge.rb (unused, but it should at least
contain no errors), and a unit test for ticket:88.
r1293@bean: ser | 2007-07-25 08:19:37 -0400
r1281@bean: ser | 2007-07-24 11:08:48 -0400
Addresses ticket:85
This is a major rewrite of the XML formatting code. The XML writers have all
been extracted out of the classes and put into their own class containers.
This makes writing parsers easier, and cleaner.
There are three formatters, which correspond to the previous three XML writing
modes:
REXML::Formatters::Default
Prints the XML document exactly as it was parsed
REXML::Formatters::Pretty
Pretty prints the XML document, destroying whitespace in the document
REXML::Formatters::Transitive
Pretty prints the XML document, preserving whitespace
All of the write() functions have been deprecated (some are still used, but
these will also go away) except the write() function on Document, which is left
for convenience. To pretty print an XML document the canonical way:
formatter = REXML::Formatters::Pretty.new( 5 ) # indent by 5 spaces
formatter.write( document, output )
r1294@bean: ser | 2007-07-25 08:19:38 -0400
r1283@bean: ser | 2007-07-24 19:53:30 -0400
This goes with the previous commit.
r1295@bean: ser | 2007-07-25 08:19:39 -0400
r1285@bean: ser | 2007-07-24 20:02:07 -0400
And THIS goes with the previous two patches. Dammit.
r1296@bean: ser | 2007-07-25 08:19:40 -0400
r1287@bean: ser | 2007-07-24 20:12:25 -0400
Applied patch from Jeff Barczewski. Note that this changes what the values of
the name and IDs are from the previous behavior -- the values no longer include
the quotes. This is the correct behavior, so I'm leaving it in, but it is not
backwards compatible. Also fixes the serializer so that it outputs the doctype
in a correct format (needed as a result of this change).
r1297@bean: ser | 2007-07-25 08:38:38 -0400
Version update
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12844 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-25 08:47:23 -04:00
|
|
|
long_name = $4.nil? ? nil : $4.strip
|
|
|
|
uri = $6.nil? ? nil : $6.strip
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
|
|
|
if close == ">"
|
|
|
|
@document_status = :after_doctype
|
|
|
|
@source.read if @source.buffer.size<2
|
|
|
|
md = @source.match(/^\s*/um, true)
|
|
|
|
@stack << [ :end_doctype ]
|
|
|
|
else
|
|
|
|
@document_status = :in_doctype
|
|
|
|
end
|
|
|
|
return args
|
|
|
|
when /^\s+/
|
|
|
|
else
|
|
|
|
@document_status = :after_doctype
|
|
|
|
@source.read if @source.buffer.size<2
|
|
|
|
md = @source.match(/\s*/um, true)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if @document_status == :in_doctype
|
|
|
|
md = @source.match(/\s*(.*?>)/um)
|
|
|
|
case md[1]
|
|
|
|
when SYSTEMENTITY
|
|
|
|
match = @source.match( SYSTEMENTITY, true )[1]
|
|
|
|
return [ :externalentity, match ]
|
2003-12-08 21:41:33 -05:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
when ELEMENTDECL_START
|
|
|
|
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
2003-12-08 21:41:33 -05:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
when ENTITY_START
|
|
|
|
match = @source.match( ENTITYDECL, true ).to_a.compact
|
|
|
|
match[0] = :entitydecl
|
|
|
|
ref = false
|
|
|
|
if match[1] == '%'
|
|
|
|
ref = true
|
|
|
|
match.delete_at 1
|
|
|
|
end
|
|
|
|
# Now we have to sort out what kind of entity reference this is
|
|
|
|
if match[2] == 'SYSTEM'
|
|
|
|
# External reference
|
|
|
|
match[3] = match[3][1..-2] # PUBID
|
|
|
|
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
|
|
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
|
|
elsif match[2] == 'PUBLIC'
|
|
|
|
# External reference
|
|
|
|
match[3] = match[3][1..-2] # PUBID
|
|
|
|
match[4] = match[4][1..-2] # HREF
|
|
|
|
# match is [ :entity, name, PUBLIC, pubid, href ]
|
|
|
|
else
|
|
|
|
match[2] = match[2][1..-2]
|
|
|
|
match.pop if match.size == 4
|
|
|
|
# match is [ :entity, name, value ]
|
|
|
|
end
|
|
|
|
match << '%' if ref
|
|
|
|
return match
|
|
|
|
when ATTLISTDECL_START
|
|
|
|
md = @source.match( ATTLISTDECL_PATTERN, true )
|
|
|
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
|
|
element = md[1]
|
|
|
|
contents = md[0]
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
pairs = {}
|
|
|
|
values = md[0].scan( ATTDEF_RE )
|
|
|
|
values.each do |attdef|
|
|
|
|
unless attdef[3] == "#IMPLIED"
|
|
|
|
attdef.compact!
|
|
|
|
val = attdef[3]
|
|
|
|
val = attdef[4] if val == "#FIXED "
|
|
|
|
pairs[attdef[0]] = val
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return [ :attlistdecl, element, pairs, contents ]
|
|
|
|
when NOTATIONDECL_START
|
|
|
|
md = nil
|
|
|
|
if @source.match( PUBLIC )
|
|
|
|
md = @source.match( PUBLIC, true )
|
|
|
|
vals = [md[1],md[2],md[4],md[6]]
|
|
|
|
elsif @source.match( SYSTEM )
|
|
|
|
md = @source.match( SYSTEM, true )
|
|
|
|
vals = [md[1],md[2],nil,md[4]]
|
|
|
|
else
|
|
|
|
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
|
|
|
end
|
|
|
|
return [ :notationdecl, *vals ]
|
|
|
|
when CDATA_END
|
|
|
|
@document_status = :after_doctype
|
|
|
|
@source.match( CDATA_END, true )
|
|
|
|
return [ :end_doctype ]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
begin
|
|
|
|
if @source.buffer[0] == ?<
|
|
|
|
if @source.buffer[1] == ?/
|
|
|
|
last_tag = @tags.pop
|
|
|
|
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
|
|
|
md = @source.match( CLOSE_MATCH, true )
|
|
|
|
raise REXML::ParseException.new( "Missing end tag for "+
|
r1002 | ser | 2004-06-07 07:45:53 -0400 (Mon, 07 Jun 2004) | 2 lines
* Workin' in the coal mine, goin' down, down, down...
r1003 | ser | 2004-06-08 22:24:08 -0400 (Tue, 08 Jun 2004) | 7 lines
* Entirely rewrote the validation code; the finite state machine, while cool,
didn't survive the encounter with Interleave. It was getting sort of hacky,
too. The new mechanism is less elegant, but is basically still a FSM, and is
more flexible without having to add hacks to extend it. Large chunks of the
FSM may be reusable in other validation mechanisms.
* Added interleave support
r1004 | ser | 2004-06-09 07:24:17 -0400 (Wed, 09 Jun 2004) | 2 lines
* Added suppert for mixed
r1005 | ser | 2004-06-09 08:01:33 -0400 (Wed, 09 Jun 2004) | 3 lines
* Added Kou's patch to normalize attribute values passed through the SAX2 and
Stream parsers.
r1006 | ser | 2004-06-09 08:12:35 -0400 (Wed, 09 Jun 2004) | 2 lines
* Applied Kou's preceding-sibling patch, which fixes the order of the axe results
r1009 | ser | 2004-06-20 11:02:55 -0400 (Sun, 20 Jun 2004) | 8 lines
* Redesigned and rewrote the RelaxNG code. It isn't elegant, but it works.
Particular problems encountered were interleave and ref. Interleave means I
can't use a clean FSM design, and ref means the dirty FSM design has to be modified
during validation. There's a lot of code that could be cleaned up in here.
However, I'm pretty sure that this design is reasonably fast and space efficient.
I'm not entirely convinced that it is correct; more tests are required.
* This version adds support for defines and refs.
r1011 | ser | 2004-06-20 11:20:07 -0400 (Sun, 20 Jun 2004) | 3 lines
* Removed debugging output from unit test
* Moved ">" in Element.inspect
r1014 | ser | 2004-06-20 11:40:30 -0400 (Sun, 20 Jun 2004) | 2 lines
* Minor big in missing includes for validation rules
r1023 | ser | 2004-07-03 08:57:34 -0400 (Sat, 03 Jul 2004) | 2 lines
* Fixed bug #34, typo in xpath_parser.
r1024 | ser | 2004-07-03 10:22:08 -0400 (Sat, 03 Jul 2004) | 9 lines
* Previous fix, (include? -> includes?) was incorrect.
* Added another test for encoding
* Started AnyName support in RelaxNG
* Added Element#Attributes#to_a, so that it does something intelligent.
This was needed by XPath, for '@*'
* Fixed XPath so that @* works.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6577 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2004-07-04 11:26:07 -04:00
|
|
|
"'#{last_tag}' (got \"#{md[1]}\")",
|
|
|
|
@source) unless last_tag == md[1]
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
return [ :end_element, last_tag ]
|
|
|
|
elsif @source.buffer[1] == ?!
|
|
|
|
md = @source.match(/\A(\s*[^>]*>)/um)
|
|
|
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
|
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
|
|
|
if md[0][2] == ?-
|
|
|
|
md = @source.match( COMMENT_PATTERN, true )
|
|
|
|
return [ :comment, md[1] ] if md
|
|
|
|
else
|
|
|
|
md = @source.match( CDATA_PATTERN, true )
|
|
|
|
return [ :cdata, md[1] ] if md
|
|
|
|
end
|
|
|
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
|
|
|
"in the doctype declaration.", @source)
|
|
|
|
elsif @source.buffer[1] == ??
|
|
|
|
md = @source.match( INSTRUCTION_PATTERN, true )
|
|
|
|
return [ :processing_instruction, md[1], md[2] ] if md
|
|
|
|
raise REXML::ParseException.new( "Bad instruction declaration",
|
|
|
|
@source)
|
|
|
|
else
|
|
|
|
# Get the next tag
|
|
|
|
md = @source.match(TAG_MATCH, true)
|
2006-09-07 22:03:44 -04:00
|
|
|
unless md
|
|
|
|
# Check for missing attribute quotes
|
|
|
|
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
|
|
|
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
|
|
|
end
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
attrs = []
|
|
|
|
if md[2].size > 0
|
|
|
|
attrs = md[2].scan( ATTRIBUTE_PATTERN )
|
|
|
|
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
|
|
|
end
|
|
|
|
|
|
|
|
if md[4]
|
|
|
|
@closed = md[1]
|
|
|
|
else
|
|
|
|
@tags.push( md[1] )
|
|
|
|
end
|
|
|
|
attributes = {}
|
|
|
|
attrs.each { |a,b,c| attributes[a] = c }
|
|
|
|
return [ :start_element, md[1], attributes ]
|
|
|
|
end
|
|
|
|
else
|
|
|
|
md = @source.match( TEXT_PATTERN, true )
|
2004-02-13 17:40:14 -05:00
|
|
|
if md[0].length == 0
|
|
|
|
@source.match( /(\s+)/, true )
|
|
|
|
end
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
2004-02-13 17:40:14 -05:00
|
|
|
#return [ :text, "" ] if md[0].length == 0
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# unnormalized = Text::unnormalize( md[1], self )
|
|
|
|
# return PullEvent.new( :text, md[1], unnormalized )
|
|
|
|
return [ :text, md[1] ]
|
|
|
|
end
|
|
|
|
rescue REXML::ParseException
|
|
|
|
raise
|
|
|
|
rescue Exception, NameError => error
|
|
|
|
raise REXML::ParseException.new( "Exception parsing",
|
|
|
|
@source, self, (error ? error : $!) )
|
|
|
|
end
|
|
|
|
return [ :dummy ]
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
def entity( reference, entities )
|
|
|
|
value = nil
|
|
|
|
value = entities[ reference ] if entities
|
|
|
|
if not value
|
|
|
|
value = DEFAULT_ENTITIES[ reference ]
|
|
|
|
value = value[2] if value
|
|
|
|
end
|
|
|
|
unnormalize( value, entities ) if value
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Escapes all possible entities
|
|
|
|
def normalize( input, entities=nil, entity_filter=nil )
|
|
|
|
copy = input.clone
|
|
|
|
# Doing it like this rather than in a loop improves the speed
|
|
|
|
copy.gsub!( EREFERENCE, '&' )
|
|
|
|
entities.each do |key, value|
|
|
|
|
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
|
|
|
entity_filter.include?(entity)
|
|
|
|
end if entities
|
|
|
|
copy.gsub!( EREFERENCE, '&' )
|
|
|
|
DEFAULT_ENTITIES.each do |key, value|
|
|
|
|
copy.gsub!( value[3], value[1] )
|
|
|
|
end
|
|
|
|
copy
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
|
Short summary:
This is a version bump to REXML 3.1.4 for Ruby HEAD. This change log is
identical to the log for the 1.8 branch.
It includes numerous bug fixes and is a pretty big patch, but is nonetheless
a minor revision bump, since the API hasn't changed.
For more information, see:
http:/www.germane-software.com/projects/rexml/milestone/3.1.4
For all tickets, see:
http://www.germane-software.com/projects/rexml/ticket/#
Where '#' is replaced with the ticket number.
Changelog:
* Fixed the documentation WRT the raw mode of text nodes (ticket #4)
* Fixes roundup ticket #43: substring-after bug.
* Fixed ticket #44, Element#xpath
* Patch submitted by an anonymous doner to allow parsing of Tempfiles. I was
hoping that, by now, that whole Source thing would have been changed to use
duck typing and avoid this sort of ticket... but in the meantime, the patch
has been applied.
* Fixes ticket:30, XPath default namespace bug. The fix was provided
by Lucas Nussbaum.
* Aliases #size to #length, as per zdennis's request.
* Fixes typo from previous commit
* Fixes ticket #32, preceding-sibling fails attempting delete_if on nil nodeset
* Merges a user-contributed patch for ticket #40
* Adds a forgotten-to-commit unit test for ticket #32
* Changes Date, Version, and Copyright to upper case, to avoid conflicts with
the Date class. All of the other changes in the altered files are because
Subversion doesn't allow block-level commits, like it should. English cased
Version and Copyright are aliased to the upper case versions, for partial
backward compatability.
* Resolves ticket #34, SAX parser change makes it impossible to parse IO feeds.
* Moves parser.source.position() to parser.position()
* Fixes ticket:48, repeated writes munging text content
* Fixes ticket:46, adding methods for accessing notation DTD information.
* Encodes some characters and removes a brokes link in the documentation
* Deals with carriage returns after XML declarations
* Improved doctype handling
* Whitespace handling changes
* Applies a patch by David Tardon, which (incidentally) fixes ticket:50
* Closes #26, allowing anything that walks like an IO to be a source.
* Ticket #31 - One unescape too many
This wasn't really a bug, per se... "value" always returns
a normalized string, and "value" is the method used to get
the text() of an element. However, entities have no meaning
in CDATA sections, so there's no justification for value
to be normalizing the content of CData objects. This behavior
has therefore been changed.
* Ticket #45 -- Now parses notation declarations in DTDs properly.
* Resolves ticket #49, Document.parse_stream returns ArgumentError
* Adds documentation to clarify how XMLDecl works, to avoid invalid bug reports.
* Addresses ticket #10, fixing the StreamParser API for DTDs.
* Fixes ticket #42, XPath node-set function 'name' fails with relative node
set parameter
* Good patch by Aaron to fix ticket #53: REXML ignoring unbalanced tags
at the end of a document.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10092 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-04-15 00:11:04 -04:00
|
|
|
# Unescapes all possible entities
|
|
|
|
def unnormalize( string, entities=nil, filter=nil )
|
|
|
|
rv = string.clone
|
|
|
|
rv.gsub!( /\r\n?/, "\n" )
|
|
|
|
matches = rv.scan( REFERENCE_RE )
|
|
|
|
return rv if matches.size == 0
|
|
|
|
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
|
|
|
|
m=$1
|
|
|
|
m = "0#{m}" if m[0] == ?x
|
|
|
|
[Integer(m)].pack('U*')
|
|
|
|
}
|
|
|
|
matches.collect!{|x|x[0]}.compact!
|
|
|
|
if matches.size > 0
|
|
|
|
matches.each do |entity_reference|
|
|
|
|
unless filter and filter.include?(entity_reference)
|
|
|
|
entity_value = entity( entity_reference, entities )
|
|
|
|
if entity_value
|
|
|
|
re = /&#{entity_reference};/
|
|
|
|
rv.gsub!( re, entity_value )
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
matches.each do |entity_reference|
|
|
|
|
unless filter and filter.include?(entity_reference)
|
|
|
|
er = DEFAULT_ENTITIES[entity_reference]
|
|
|
|
rv.gsub!( er[0], er[2] ) if er
|
|
|
|
end
|
|
|
|
end
|
|
|
|
rv.gsub!( /&/, '&' )
|
|
|
|
end
|
|
|
|
rv
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2003-06-09 21:31:01 -04:00
|
|
|
end
|
2004-06-09 22:01:04 -04:00
|
|
|
|
|
|
|
=begin
|
|
|
|
case event[0]
|
|
|
|
when :start_element
|
|
|
|
when :text
|
|
|
|
when :end_element
|
|
|
|
when :processing_instruction
|
|
|
|
when :cdata
|
|
|
|
when :comment
|
|
|
|
when :xmldecl
|
|
|
|
when :start_doctype
|
|
|
|
when :end_doctype
|
|
|
|
when :externalentity
|
|
|
|
when :elementdecl
|
|
|
|
when :entity
|
|
|
|
when :attlistdecl
|
|
|
|
when :notationdecl
|
|
|
|
when :end_doctype
|
|
|
|
end
|
|
|
|
=end
|