2004-01-31 11:10:53 -05:00
|
|
|
require "forwardable"
|
2005-11-24 22:54:29 -05:00
|
|
|
begin
|
|
|
|
require "open-uri"
|
|
|
|
rescue LoadError
|
|
|
|
require "uri"
|
|
|
|
end
|
|
|
|
unless Kernel.methods.include?("URI")
|
|
|
|
module Kernel
|
|
|
|
def URI(uri_str) # :doc:
|
|
|
|
URI.parse(uri_str)
|
|
|
|
end
|
|
|
|
module_function :URI
|
|
|
|
end
|
|
|
|
end
|
2004-01-31 11:10:53 -05:00
|
|
|
|
2004-01-27 22:46:13 -05:00
|
|
|
require "rss/rss"
|
|
|
|
|
|
|
|
module RSS
|
|
|
|
|
2004-10-16 00:51:15 -04:00
|
|
|
class NotWellFormedError < Error
|
|
|
|
attr_reader :line, :element
|
|
|
|
def initialize(line=nil, element=nil)
|
|
|
|
message = "This is not well formed XML"
|
|
|
|
if element or line
|
|
|
|
message << "\nerror occurred"
|
|
|
|
message << " in #{element}" if element
|
|
|
|
message << " at about #{line} line" if line
|
|
|
|
end
|
|
|
|
message << "\n#{yield}" if block_given?
|
|
|
|
super(message)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class XMLParserNotFound < Error
|
|
|
|
def initialize
|
|
|
|
super("available XML parser does not found in " <<
|
|
|
|
"#{AVAILABLE_PARSER_LIBRARIES.inspect}.")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class NotValidXMLParser < Error
|
|
|
|
def initialize(parser)
|
|
|
|
super("#{parser} is not available XML parser. " <<
|
|
|
|
"available XML parser is " <<
|
|
|
|
"#{AVAILABLE_PARSERS.inspect}.")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class NSError < InvalidRSSError
|
|
|
|
attr_reader :tag, :prefix, :uri
|
|
|
|
def initialize(tag, prefix, require_uri)
|
|
|
|
@tag, @prefix, @uri = tag, prefix, require_uri
|
|
|
|
super("prefix <#{prefix}> doesn't associate uri " <<
|
|
|
|
"<#{require_uri}> in tag <#{tag}>")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class Parser
|
|
|
|
|
|
|
|
extend Forwardable
|
|
|
|
|
|
|
|
class << self
|
|
|
|
|
|
|
|
@@default_parser = nil
|
|
|
|
|
|
|
|
def default_parser
|
|
|
|
@@default_parser || AVAILABLE_PARSERS.first
|
|
|
|
end
|
|
|
|
|
|
|
|
def default_parser=(new_value)
|
|
|
|
if AVAILABLE_PARSERS.include?(new_value)
|
|
|
|
@@default_parser = new_value
|
|
|
|
else
|
|
|
|
raise NotValidXMLParser.new(new_value)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse(rss, do_validate=true, ignore_unknown_element=true, parser_class=default_parser)
|
|
|
|
parser = new(rss, parser_class)
|
|
|
|
parser.do_validate = do_validate
|
|
|
|
parser.ignore_unknown_element = ignore_unknown_element
|
|
|
|
parser.parse
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def_delegators(:@parser, :parse, :rss,
|
|
|
|
:ignore_unknown_element,
|
|
|
|
:ignore_unknown_element=, :do_validate,
|
|
|
|
:do_validate=)
|
|
|
|
|
|
|
|
def initialize(rss, parser_class=self.class.default_parser)
|
2005-07-22 01:41:33 -04:00
|
|
|
@parser = parser_class.new(normalize_rss(rss))
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
def normalize_rss(rss)
|
|
|
|
return rss if maybe_xml?(rss)
|
|
|
|
|
|
|
|
uri = to_uri(rss)
|
|
|
|
|
|
|
|
if uri.respond_to?(:read)
|
|
|
|
uri.read
|
|
|
|
elsif !rss.tainted? and File.readable?(rss)
|
|
|
|
File.open(rss) {|f| f.read}
|
|
|
|
else
|
|
|
|
rss
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def maybe_xml?(source)
|
|
|
|
source.is_a?(String) and /</ =~ source
|
|
|
|
end
|
|
|
|
|
|
|
|
def to_uri(rss)
|
|
|
|
return rss if rss.is_a?(::URI::Generic)
|
|
|
|
|
|
|
|
begin
|
|
|
|
URI(rss)
|
|
|
|
rescue ::URI::Error
|
|
|
|
rss
|
|
|
|
end
|
2004-10-16 00:51:15 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class BaseParser
|
|
|
|
|
2005-11-23 08:35:11 -05:00
|
|
|
class << self
|
|
|
|
def raise_for_undefined_entity?
|
|
|
|
listener.raise_for_undefined_entity?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2004-10-16 00:51:15 -04:00
|
|
|
def initialize(rss)
|
2005-11-23 08:35:11 -05:00
|
|
|
@listener = self.class.listener.new
|
2004-10-16 00:51:15 -04:00
|
|
|
@rss = rss
|
|
|
|
end
|
|
|
|
|
|
|
|
def rss
|
|
|
|
@listener.rss
|
|
|
|
end
|
|
|
|
|
|
|
|
def ignore_unknown_element
|
|
|
|
@listener.ignore_unknown_element
|
|
|
|
end
|
|
|
|
|
|
|
|
def ignore_unknown_element=(new_value)
|
|
|
|
@listener.ignore_unknown_element = new_value
|
|
|
|
end
|
|
|
|
|
|
|
|
def do_validate
|
|
|
|
@listener.do_validate
|
|
|
|
end
|
|
|
|
|
|
|
|
def do_validate=(new_value)
|
|
|
|
@listener.do_validate = new_value
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse
|
|
|
|
if @listener.rss.nil?
|
|
|
|
_parse
|
|
|
|
end
|
|
|
|
@listener.rss
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
class BaseListener
|
|
|
|
|
|
|
|
extend Utils
|
|
|
|
|
|
|
|
class << self
|
|
|
|
|
2005-03-30 21:10:18 -05:00
|
|
|
@@setters = {}
|
2004-10-16 00:51:15 -04:00
|
|
|
@@registered_uris = {}
|
2005-04-05 02:42:03 -04:00
|
|
|
@@class_names = {}
|
2004-10-16 00:51:15 -04:00
|
|
|
|
|
|
|
def install_setter(uri, tag_name, setter)
|
2005-03-30 21:10:18 -05:00
|
|
|
@@setters[uri] ||= {}
|
|
|
|
@@setters[uri][tag_name] = setter
|
2004-10-16 00:51:15 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def setter(uri, tag_name)
|
|
|
|
begin
|
2005-03-30 21:10:18 -05:00
|
|
|
@@setters[uri][tag_name]
|
2004-10-16 00:51:15 -04:00
|
|
|
rescue NameError
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def available_tags(uri)
|
|
|
|
begin
|
2005-03-30 21:10:18 -05:00
|
|
|
@@setters[uri].keys
|
2004-10-16 00:51:15 -04:00
|
|
|
rescue NameError
|
|
|
|
[]
|
|
|
|
end
|
|
|
|
end
|
2005-04-05 02:42:03 -04:00
|
|
|
|
2005-03-30 21:10:18 -05:00
|
|
|
def register_uri(uri, name)
|
|
|
|
@@registered_uris[name] ||= {}
|
|
|
|
@@registered_uris[name][uri] = nil
|
|
|
|
end
|
2005-04-05 02:42:03 -04:00
|
|
|
|
2005-03-30 21:10:18 -05:00
|
|
|
def uri_registered?(uri, name)
|
|
|
|
@@registered_uris[name].has_key?(uri)
|
|
|
|
end
|
|
|
|
|
2005-04-05 02:42:03 -04:00
|
|
|
def install_class_name(uri, tag_name, class_name)
|
|
|
|
@@class_names[uri] ||= {}
|
|
|
|
@@class_names[uri][tag_name] = class_name
|
|
|
|
end
|
|
|
|
|
|
|
|
def class_name(uri, tag_name)
|
|
|
|
begin
|
|
|
|
@@class_names[uri][tag_name]
|
|
|
|
rescue NameError
|
|
|
|
tag_name[0,1].upcase + tag_name[1..-1]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2005-03-30 21:10:18 -05:00
|
|
|
def install_get_text_element(uri, name, setter)
|
2004-10-16 00:51:15 -04:00
|
|
|
install_setter(uri, name, setter)
|
|
|
|
def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
|
|
|
|
end
|
|
|
|
|
2005-11-23 08:35:11 -05:00
|
|
|
def raise_for_undefined_entity?
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
2004-10-16 00:51:15 -04:00
|
|
|
private
|
|
|
|
|
|
|
|
def def_get_text_element(uri, name, file, line)
|
2005-03-30 21:10:18 -05:00
|
|
|
register_uri(uri, name)
|
2004-10-16 00:51:15 -04:00
|
|
|
unless private_instance_methods(false).include?("start_#{name}")
|
|
|
|
module_eval(<<-EOT, file, line)
|
|
|
|
def start_#{name}(name, prefix, attrs, ns)
|
|
|
|
uri = ns[prefix]
|
2005-03-30 21:10:18 -05:00
|
|
|
if self.class.uri_registered?(uri, #{name.inspect})
|
2004-10-16 00:51:15 -04:00
|
|
|
if @do_validate
|
|
|
|
tags = self.class.available_tags(uri)
|
|
|
|
unless tags.include?(name)
|
|
|
|
raise UnknownTagError.new(name, uri)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
start_get_text_element(name, prefix, ns, uri)
|
|
|
|
else
|
|
|
|
start_else_element(name, prefix, attrs, ns)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
EOT
|
2005-11-23 04:49:02 -05:00
|
|
|
__send__("private", "start_#{name}")
|
2004-10-16 00:51:15 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
module ListenerMixin
|
|
|
|
|
|
|
|
attr_reader :rss
|
|
|
|
|
|
|
|
attr_accessor :ignore_unknown_element
|
|
|
|
attr_accessor :do_validate
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
@rss = nil
|
|
|
|
@ignore_unknown_element = true
|
|
|
|
@do_validate = true
|
|
|
|
@ns_stack = [{}]
|
|
|
|
@tag_stack = [[]]
|
|
|
|
@text_stack = ['']
|
|
|
|
@proc_stack = []
|
|
|
|
@last_element = nil
|
|
|
|
@version = @encoding = @standalone = nil
|
|
|
|
@xml_stylesheets = []
|
|
|
|
end
|
|
|
|
|
|
|
|
def xmldecl(version, encoding, standalone)
|
|
|
|
@version, @encoding, @standalone = version, encoding, standalone
|
|
|
|
end
|
|
|
|
|
|
|
|
def instruction(name, content)
|
|
|
|
if name == "xml-stylesheet"
|
|
|
|
params = parse_pi_content(content)
|
|
|
|
if params.has_key?("href")
|
|
|
|
@xml_stylesheets << XMLStyleSheet.new(*params)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tag_start(name, attributes)
|
|
|
|
@text_stack.push('')
|
|
|
|
|
|
|
|
ns = @ns_stack.last.dup
|
|
|
|
attrs = {}
|
|
|
|
attributes.each do |n, v|
|
2005-02-14 10:01:05 -05:00
|
|
|
if /\Axmlns(?:\z|:)/ =~ n
|
2004-10-16 00:51:15 -04:00
|
|
|
ns[$POSTMATCH] = v
|
|
|
|
else
|
|
|
|
attrs[n] = v
|
|
|
|
end
|
|
|
|
end
|
|
|
|
@ns_stack.push(ns)
|
|
|
|
|
|
|
|
prefix, local = split_name(name)
|
|
|
|
@tag_stack.last.push([ns[prefix], local])
|
|
|
|
@tag_stack.push([])
|
|
|
|
if respond_to?("start_#{local}", true)
|
2005-11-23 04:49:02 -05:00
|
|
|
__send__("start_#{local}", local, prefix, attrs, ns.dup)
|
2004-10-16 00:51:15 -04:00
|
|
|
else
|
|
|
|
start_else_element(local, prefix, attrs, ns.dup)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tag_end(name)
|
|
|
|
if DEBUG
|
|
|
|
p "end tag #{name}"
|
|
|
|
p @tag_stack
|
|
|
|
end
|
|
|
|
text = @text_stack.pop
|
|
|
|
tags = @tag_stack.pop
|
|
|
|
pr = @proc_stack.pop
|
|
|
|
pr.call(text, tags) unless pr.nil?
|
2005-02-14 10:01:05 -05:00
|
|
|
@ns_stack.pop
|
2004-10-16 00:51:15 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def text(data)
|
|
|
|
@text_stack.last << data
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/
|
|
|
|
def parse_pi_content(content)
|
|
|
|
params = {}
|
|
|
|
content.scan(CONTENT_PATTERN) do |name, quote, value|
|
|
|
|
params[name] = value
|
|
|
|
end
|
|
|
|
params
|
|
|
|
end
|
|
|
|
|
|
|
|
def start_else_element(local, prefix, attrs, ns)
|
2005-04-05 02:42:03 -04:00
|
|
|
class_name = self.class.class_name(ns[prefix], local)
|
2004-10-16 00:51:15 -04:00
|
|
|
current_class = @last_element.class
|
|
|
|
if current_class.constants.include?(class_name)
|
|
|
|
next_class = current_class.const_get(class_name)
|
|
|
|
start_have_something_element(local, prefix, attrs, ns, next_class)
|
|
|
|
else
|
|
|
|
if @ignore_unknown_element
|
|
|
|
@proc_stack.push(nil)
|
|
|
|
else
|
|
|
|
parent = "ROOT ELEMENT???"
|
|
|
|
if current_class.tag_name
|
|
|
|
parent = current_class.tag_name
|
|
|
|
end
|
|
|
|
raise NotExceptedTagError.new(local, parent)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
NAMESPLIT = /^(?:([\w:][-\w\d.]*):)?([\w:][-\w\d.]*)/
|
|
|
|
def split_name(name)
|
|
|
|
name =~ NAMESPLIT
|
|
|
|
[$1 || '', $2]
|
|
|
|
end
|
|
|
|
|
|
|
|
def check_ns(tag_name, prefix, ns, require_uri)
|
|
|
|
if @do_validate
|
|
|
|
if ns[prefix] == require_uri
|
|
|
|
#ns.delete(prefix)
|
|
|
|
else
|
|
|
|
raise NSError.new(tag_name, prefix, require_uri)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def start_get_text_element(tag_name, prefix, ns, required_uri)
|
|
|
|
@proc_stack.push Proc.new {|text, tags|
|
|
|
|
setter = self.class.setter(required_uri, tag_name)
|
|
|
|
setter ||= "#{tag_name}="
|
|
|
|
if @last_element.respond_to?(setter)
|
2005-11-23 04:49:02 -05:00
|
|
|
@last_element.__send__(setter, text.to_s)
|
2004-10-16 00:51:15 -04:00
|
|
|
else
|
|
|
|
if @do_validate and not @ignore_unknown_element
|
|
|
|
raise NotExceptedTagError.new(tag_name, @last_element.tag_name)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def start_have_something_element(tag_name, prefix, attrs, ns, klass)
|
|
|
|
|
|
|
|
check_ns(tag_name, prefix, ns, klass.required_uri)
|
|
|
|
|
|
|
|
args = []
|
|
|
|
|
|
|
|
klass.get_attributes.each do |a_name, a_uri, required|
|
|
|
|
|
|
|
|
if a_uri.is_a?(String) or !a_uri.respond_to?(:include?)
|
|
|
|
a_uri = [a_uri]
|
|
|
|
end
|
|
|
|
unless a_uri == [nil]
|
|
|
|
for prefix, uri in ns
|
|
|
|
if a_uri.include?(uri)
|
|
|
|
val = attrs["#{prefix}:#{a_name}"]
|
|
|
|
break if val
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if val.nil? and a_uri.include?(nil)
|
|
|
|
val = attrs[a_name]
|
|
|
|
end
|
|
|
|
|
|
|
|
if @do_validate and required and val.nil?
|
2004-11-19 03:25:25 -05:00
|
|
|
unless a_uri.include?(nil)
|
|
|
|
for prefix, uri in ns
|
|
|
|
if a_uri.include?(uri)
|
|
|
|
a_name = "#{prefix}:#{a_name}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2004-10-16 00:51:15 -04:00
|
|
|
raise MissingAttributeError.new(tag_name, a_name)
|
|
|
|
end
|
|
|
|
|
|
|
|
args << val
|
|
|
|
end
|
|
|
|
|
|
|
|
previous = @last_element
|
2005-11-24 22:54:29 -05:00
|
|
|
next_element = klass.new(*args)
|
2004-10-16 00:51:15 -04:00
|
|
|
next_element.do_validate = @do_validate
|
2005-11-16 23:53:59 -05:00
|
|
|
previous.funcall(:set_next_element, tag_name, next_element)
|
2004-10-16 00:51:15 -04:00
|
|
|
@last_element = next_element
|
|
|
|
@proc_stack.push Proc.new { |text, tags|
|
|
|
|
p(@last_element.class) if DEBUG
|
|
|
|
@last_element.content = text if klass.have_content?
|
|
|
|
@last_element.validate_for_stream(tags) if @do_validate
|
|
|
|
@last_element = previous
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
unless const_defined? :AVAILABLE_PARSER_LIBRARIES
|
|
|
|
AVAILABLE_PARSER_LIBRARIES = [
|
|
|
|
["rss/xmlparser", :XMLParserParser],
|
|
|
|
["rss/xmlscanner", :XMLScanParser],
|
|
|
|
["rss/rexmlparser", :REXMLParser],
|
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
AVAILABLE_PARSERS = []
|
|
|
|
|
|
|
|
AVAILABLE_PARSER_LIBRARIES.each do |lib, parser|
|
|
|
|
begin
|
|
|
|
require lib
|
|
|
|
AVAILABLE_PARSERS.push(const_get(parser))
|
|
|
|
rescue LoadError
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if AVAILABLE_PARSERS.empty?
|
|
|
|
raise XMLParserNotFound
|
|
|
|
end
|
2004-01-27 22:46:13 -05:00
|
|
|
end
|