* lib/rss/parser.rb: added entity handling type predicate.

* lib/rss/rexmlparser.rb: ditto.
* lib/rss/xmlparser.rb: ditto.
* lib/rss/xmlscanner.rb: ditto.

* lib/rss/xmlscanner.rb: more robust entity handling.

* test/rss/test_parser.rb: added an entity handling test.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9602 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
kou 2005-11-23 13:35:11 +00:00
parent 4b87fa9de2
commit 93402a2f17
6 changed files with 80 additions and 19 deletions

View File

@ -1,3 +1,14 @@
Wed Nov 23 22:34:15 2005 Kouhei Sutou <kou@cozmixng.org>
* lib/rss/parser.rb: added entity handling type predicate.
* lib/rss/rexmlparser.rb: ditto.
* lib/rss/xmlparser.rb: ditto.
* lib/rss/xmlscanner.rb: ditto.
* lib/rss/xmlscanner.rb: more robust entity handling.
* test/rss/test_parser.rb: added an entity handling test.
Wed Nov 23 20:59:01 2005 Hidetoshi NAGAI <nagai@ai.kyutech.ac.jp> Wed Nov 23 20:59:01 2005 Hidetoshi NAGAI <nagai@ai.kyutech.ac.jp>
* ext/tk/lib/tk.rb: add Tk.pkgconfig_list and Tk.pkgconfig_get * ext/tk/lib/tk.rb: add Tk.pkgconfig_list and Tk.pkgconfig_get

View File

@ -69,7 +69,6 @@ module RSS
parser.ignore_unknown_element = ignore_unknown_element parser.ignore_unknown_element = ignore_unknown_element
parser.parse parser.parse
end end
end end
def_delegators(:@parser, :parse, :rss, def_delegators(:@parser, :parse, :rss,
@ -113,8 +112,14 @@ module RSS
class BaseParser class BaseParser
class << self
def raise_for_undefined_entity?
listener.raise_for_undefined_entity?
end
end
def initialize(rss) def initialize(rss)
@listener = listener.new @listener = self.class.listener.new
@rss = rss @rss = rss
end end
@ -205,6 +210,10 @@ module RSS
def_get_text_element(uri, name, *get_file_and_line_from_caller(1)) def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
end end
def raise_for_undefined_entity?
true
end
private private
def def_get_text_element(uri, name, file, line) def def_get_text_element(uri, name, file, line)

View File

@ -10,12 +10,13 @@ module RSS
class REXMLParser < BaseParser class REXMLParser < BaseParser
private class << self
def listener
def listener REXMLListener
REXMLListener end
end end
private
def _parse def _parse
begin begin
REXML::Document.parse_stream(@rss, @listener) REXML::Document.parse_stream(@rss, @listener)
@ -35,6 +36,12 @@ module RSS
include REXML::StreamListener include REXML::StreamListener
include ListenerMixin include ListenerMixin
class << self
def raise_for_undefined_entity?
false
end
end
def xmldecl(version, encoding, standalone) def xmldecl(version, encoding, standalone)
super(version, encoding, standalone == "yes") super(version, encoding, standalone == "yes")
# Encoding is converted to UTF-8 when REXML parse XML. # Encoding is converted to UTF-8 when REXML parse XML.

View File

@ -59,11 +59,13 @@ module RSS
class XMLParserParser < BaseParser class XMLParserParser < BaseParser
private class << self
def listener def listener
XMLParserListener XMLParserListener
end
end end
private
def _parse def _parse
begin begin
parser = REXMLLikeXMLParser.new parser = REXMLLikeXMLParser.new

View File

@ -1,19 +1,29 @@
require 'xmlscan/scanner' require 'xmlscan/scanner'
require 'stringio'
module RSS module RSS
class XMLScanParser < BaseParser class XMLScanParser < BaseParser
private class << self
def listener def listener
XMLScanListener XMLScanListener
end
end end
private
def _parse def _parse
begin begin
XMLScan::XMLScanner.new(@listener).parse(@rss) if @rss.is_a?(String)
input = StringIO.new(@rss)
else
input = @rss
end
scanner = XMLScan::XMLScanner.new(@listener)
scanner.parse(input)
rescue XMLScan::Error => e rescue XMLScan::Error => e
raise NotWellFormedError.new(e.lineno){e.message} lineno = e.lineno || scanner.lineno || input.lineno
raise NotWellFormedError.new(lineno){e.message}
end end
end end
@ -57,7 +67,7 @@ module RSS
end end
def on_entityref(ref) def on_entityref(ref)
text(ENTITIES[ref]) text(entity(ref))
end end
def on_charref(code) def on_charref(code)
@ -79,7 +89,7 @@ module RSS
end end
def on_attr_entityref(ref) def on_attr_entityref(ref)
@current_attr << ENTITIES[ref] @current_attr << entity(ref)
end end
def on_attr_charref(code) def on_attr_charref(code)
@ -97,6 +107,15 @@ module RSS
tag_end(name) tag_end(name)
end end
private
def entity(ref)
ent = ENTITIES[ref]
if ent
ent
else
wellformed_error("undefined entity: #{ref}")
end
end
end end
end end

View File

@ -122,7 +122,20 @@ EOR
end end
assert_parse(rss, :nothing_raised) assert_parse(rss, :nothing_raised)
end end
end
def test_undefined_entity
return unless RSS::Parser.default_parser.raise_for_undefined_entity?
assert_parse(make_RDF(<<-EOR), :raises, RSS::NotWellFormedError)
#{make_channel}
#{make_image}
<item rdf:about="#{RDF_ABOUT}">
<title>#{TITLE_VALUE} &UNKNOWN_ENTITY;</title>
<link>#{LINK_VALUE}</link>
<description>#{DESCRIPTION_VALUE}</description>
</item>
#{make_textinput}
EOR
end end
def test_channel def test_channel