mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
93402a2f17
* lib/rss/rexmlparser.rb: ditto. * lib/rss/xmlparser.rb: ditto. * lib/rss/xmlscanner.rb: ditto. * lib/rss/xmlscanner.rb: more robust entity handling. * test/rss/test_parser.rb: added an entity handling test. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9602 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
93 lines
1.6 KiB
Ruby
93 lines
1.6 KiB
Ruby
begin
|
|
require "xml/parser"
|
|
rescue LoadError
|
|
require "xmlparser"
|
|
end
|
|
|
|
begin
|
|
require "xml/encoding-ja"
|
|
rescue LoadError
|
|
require "xmlencoding-ja"
|
|
if defined?(Kconv)
|
|
module XMLEncoding_ja
|
|
class SJISHandler
|
|
include Kconv
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
module XML
|
|
class Parser
|
|
unless defined?(Error)
|
|
Error = ::XMLParserError
|
|
end
|
|
end
|
|
end
|
|
|
|
module RSS
|
|
|
|
class REXMLLikeXMLParser < ::XML::Parser
|
|
|
|
include ::XML::Encoding_ja
|
|
|
|
def listener=(listener)
|
|
@listener = listener
|
|
end
|
|
|
|
def startElement(name, attrs)
|
|
@listener.tag_start(name, attrs)
|
|
end
|
|
|
|
def endElement(name)
|
|
@listener.tag_end(name)
|
|
end
|
|
|
|
def character(data)
|
|
@listener.text(data)
|
|
end
|
|
|
|
def xmlDecl(version, encoding, standalone)
|
|
@listener.xmldecl(version, encoding, standalone == 1)
|
|
end
|
|
|
|
def processingInstruction(target, content)
|
|
@listener.instruction(target, content)
|
|
end
|
|
|
|
end
|
|
|
|
class XMLParserParser < BaseParser
|
|
|
|
class << self
|
|
def listener
|
|
XMLParserListener
|
|
end
|
|
end
|
|
|
|
private
|
|
def _parse
|
|
begin
|
|
parser = REXMLLikeXMLParser.new
|
|
parser.listener = @listener
|
|
parser.parse(@rss)
|
|
rescue ::XML::Parser::Error => e
|
|
raise NotWellFormedError.new(parser.line){e.message}
|
|
end
|
|
end
|
|
|
|
end
|
|
|
|
class XMLParserListener < BaseListener
|
|
|
|
include ListenerMixin
|
|
|
|
def xmldecl(version, encoding, standalone)
|
|
super
|
|
# Encoding is converted to UTF-8 when XMLParser parses XML.
|
|
@encoding = 'UTF-8'
|
|
end
|
|
|
|
end
|
|
|
|
end
|