require "forwardable" require "rss/rss" module RSS class NotWellFormedError < Error attr_reader :line, :element def initialize(line=nil, element=nil) message = "This is not well formed XML" if element or line message << "\nerror occurred" message << " in #{element}" if element message << " at about #{line} line" if line end message << "\n#{yield}" if block_given? super(message) end end class XMLParserNotFound < Error def initialize super("available XML parser does not found in " << "#{AVAILABLE_PARSER_LIBRARIES.inspect}.") end end class NotValidXMLParser < Error def initialize(parser) super("#{parser} is not available XML parser. " << "available XML parser is " << "#{AVAILABLE_PARSERS.inspect}.") end end class NSError < InvalidRSSError attr_reader :tag, :prefix, :uri def initialize(tag, prefix, require_uri) @tag, @prefix, @uri = tag, prefix, require_uri super("prefix <#{prefix}> doesn't associate uri " << "<#{require_uri}> in tag <#{tag}>") end end class Parser extend Forwardable class << self @@default_parser = nil def default_parser @@default_parser || AVAILABLE_PARSERS.first end def default_parser=(new_value) if AVAILABLE_PARSERS.include?(new_value) @@default_parser = new_value else raise NotValidXMLParser.new(new_value) end end def parse(rss, do_validate=true, ignore_unknown_element=true, parser_class=default_parser) parser = new(rss, parser_class) parser.do_validate = do_validate parser.ignore_unknown_element = ignore_unknown_element parser.parse end end def_delegators(:@parser, :parse, :rss, :ignore_unknown_element, :ignore_unknown_element=, :do_validate, :do_validate=) def initialize(rss, parser_class=self.class.default_parser) @parser = parser_class.new(rss) end end class BaseParser def initialize(rss) @listener = listener.new @rss = rss end def rss @listener.rss end def ignore_unknown_element @listener.ignore_unknown_element end def ignore_unknown_element=(new_value) @listener.ignore_unknown_element = new_value end def do_validate @listener.do_validate end def do_validate=(new_value) @listener.do_validate = new_value end def parse if @listener.rss.nil? _parse end @listener.rss end end class BaseListener extend Utils class << self @@setter = {} @@registered_uris = {} def install_setter(uri, tag_name, setter) @@setter[uri] ||= {} @@setter[uri][tag_name] = setter end def register_uri(name, uri) @@registered_uris[name] ||= {} @@registered_uris[name][uri] = nil end def uri_registered?(name, uri) @@registered_uris[name].has_key?(uri) end def setter(uri, tag_name) begin @@setter[uri][tag_name] rescue NameError nil end end def available_tags(uri) begin @@setter[uri].keys rescue NameError [] end end def install_get_text_element(name, uri, setter) install_setter(uri, name, setter) def_get_text_element(uri, name, *get_file_and_line_from_caller(1)) end private def def_get_text_element(uri, name, file, line) register_uri(name, uri) unless private_instance_methods(false).include?("start_#{name}") module_eval(<<-EOT, file, line) def start_#{name}(name, prefix, attrs, ns) uri = ns[prefix] if self.class.uri_registered?(#{name.inspect}, uri) if @do_validate tags = self.class.available_tags(uri) unless tags.include?(name) raise UnknownTagError.new(name, uri) end end start_get_text_element(name, prefix, ns, uri) else start_else_element(name, prefix, attrs, ns) end end EOT send("private", "start_#{name}") end end end end module ListenerMixin attr_reader :rss attr_accessor :ignore_unknown_element attr_accessor :do_validate def initialize @rss = nil @ignore_unknown_element = true @do_validate = true @ns_stack = [{}] @tag_stack = [[]] @text_stack = [''] @proc_stack = [] @last_element = nil @version = @encoding = @standalone = nil @xml_stylesheets = [] end def xmldecl(version, encoding, standalone) @version, @encoding, @standalone = version, encoding, standalone end def instruction(name, content) if name == "xml-stylesheet" params = parse_pi_content(content) if params.has_key?("href") @xml_stylesheets << XMLStyleSheet.new(*params) end end end def tag_start(name, attributes) @text_stack.push('') ns = @ns_stack.last.dup attrs = {} attributes.each do |n, v| if /\Axmlns(?:\z|:)/ =~ n ns[$POSTMATCH] = v else attrs[n] = v end end @ns_stack.push(ns) prefix, local = split_name(name) @tag_stack.last.push([ns[prefix], local]) @tag_stack.push([]) if respond_to?("start_#{local}", true) send("start_#{local}", local, prefix, attrs, ns.dup) else start_else_element(local, prefix, attrs, ns.dup) end end def tag_end(name) if DEBUG p "end tag #{name}" p @tag_stack end text = @text_stack.pop tags = @tag_stack.pop pr = @proc_stack.pop pr.call(text, tags) unless pr.nil? @ns_stack.pop end def text(data) @text_stack.last << data end private CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/ def parse_pi_content(content) params = {} content.scan(CONTENT_PATTERN) do |name, quote, value| params[name] = value end params end def start_else_element(local, prefix, attrs, ns) class_name = local[0,1].upcase << local[1..-1] current_class = @last_element.class # begin if current_class.constants.include?(class_name) next_class = current_class.const_get(class_name) start_have_something_element(local, prefix, attrs, ns, next_class) # rescue NameError else if @ignore_unknown_element @proc_stack.push(nil) else parent = "ROOT ELEMENT???" if current_class.tag_name parent = current_class.tag_name end raise NotExceptedTagError.new(local, parent) end end end NAMESPLIT = /^(?:([\w:][-\w\d.]*):)?([\w:][-\w\d.]*)/ def split_name(name) name =~ NAMESPLIT [$1 || '', $2] end def check_ns(tag_name, prefix, ns, require_uri) if @do_validate if ns[prefix] == require_uri #ns.delete(prefix) else raise NSError.new(tag_name, prefix, require_uri) end end end def start_get_text_element(tag_name, prefix, ns, required_uri) @proc_stack.push Proc.new {|text, tags| setter = self.class.setter(required_uri, tag_name) setter ||= "#{tag_name}=" if @last_element.respond_to?(setter) @last_element.send(setter, text.to_s) else if @do_validate and not @ignore_unknown_element raise NotExceptedTagError.new(tag_name, @last_element.tag_name) end end } end def start_have_something_element(tag_name, prefix, attrs, ns, klass) check_ns(tag_name, prefix, ns, klass.required_uri) args = [] klass.get_attributes.each do |a_name, a_uri, required| if a_uri.is_a?(String) or !a_uri.respond_to?(:include?) a_uri = [a_uri] end unless a_uri == [nil] for prefix, uri in ns if a_uri.include?(uri) val = attrs["#{prefix}:#{a_name}"] break if val end end end if val.nil? and a_uri.include?(nil) val = attrs[a_name] end if @do_validate and required and val.nil? unless a_uri.include?(nil) for prefix, uri in ns if a_uri.include?(uri) a_name = "#{prefix}:#{a_name}" end end end raise MissingAttributeError.new(tag_name, a_name) end args << val end previous = @last_element next_element = klass.send(:new, *args) next_element.do_validate = @do_validate prefix = "" prefix << "#{klass.required_prefix}_" if klass.required_prefix previous.__send__(:set_next_element, prefix, tag_name, next_element) @last_element = next_element @proc_stack.push Proc.new { |text, tags| p(@last_element.class) if DEBUG @last_element.content = text if klass.have_content? @last_element.validate_for_stream(tags) if @do_validate @last_element = previous } end end unless const_defined? :AVAILABLE_PARSER_LIBRARIES AVAILABLE_PARSER_LIBRARIES = [ ["rss/xmlparser", :XMLParserParser], ["rss/xmlscanner", :XMLScanParser], ["rss/rexmlparser", :REXMLParser], ] end AVAILABLE_PARSERS = [] AVAILABLE_PARSER_LIBRARIES.each do |lib, parser| begin require lib AVAILABLE_PARSERS.push(const_get(parser)) rescue LoadError end end if AVAILABLE_PARSERS.empty? raise XMLParserNotFound end end