mirror of
https://github.com/rails/rails.git
synced 2022-11-09 12:12:34 -05:00
Added SAX-based parser for XmlMini, using Nokogiri.
Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
This commit is contained in:
parent
34b03cebf9
commit
d7f9b9fd24
2 changed files with 298 additions and 0 deletions
82
activesupport/lib/active_support/xml_mini/nokogirisax.rb
Normal file
82
activesupport/lib/active_support/xml_mini/nokogirisax.rb
Normal file
|
@ -0,0 +1,82 @@
|
|||
require 'nokogiri'
|
||||
|
||||
# = XmlMini Nokogiri implementation using a SAX-based parser
|
||||
module ActiveSupport
|
||||
module XmlMini_NokogiriSAX
|
||||
extend self
|
||||
|
||||
# Class that will build the hash while the XML document
|
||||
# is being parsed using SAX events.
|
||||
class HashBuilder < Nokogiri::XML::SAX::Document
|
||||
|
||||
CONTENT_KEY = '__content__'.freeze
|
||||
HASH_SIZE_KEY = '__hash_size__'.freeze
|
||||
|
||||
attr_reader :hash
|
||||
|
||||
def current_hash
|
||||
@hash_stack.last
|
||||
end
|
||||
|
||||
def start_document
|
||||
@hash = {}
|
||||
@hash_stack = [@hash]
|
||||
end
|
||||
|
||||
def end_document
|
||||
raise "Parse stack not empty!" if @hash_stack.size > 1
|
||||
end
|
||||
|
||||
def error(error_message)
|
||||
raise Nokogiri::XML::SyntaxError, error_message
|
||||
end
|
||||
|
||||
def start_element(name, attrs = [])
|
||||
new_hash = { CONTENT_KEY => '' }
|
||||
new_hash[attrs.shift] = attrs.shift while attrs.length > 0
|
||||
new_hash[HASH_SIZE_KEY] = new_hash.size + 1
|
||||
|
||||
case current_hash[name]
|
||||
when Array then current_hash[name] << new_hash
|
||||
when Hash then current_hash[name] = [current_hash[name], new_hash]
|
||||
when nil then current_hash[name] = new_hash
|
||||
end
|
||||
|
||||
@hash_stack.push(new_hash)
|
||||
end
|
||||
|
||||
def end_element(name)
|
||||
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
|
||||
current_hash.delete(CONTENT_KEY)
|
||||
end
|
||||
@hash_stack.pop
|
||||
end
|
||||
|
||||
def characters(string)
|
||||
current_hash[CONTENT_KEY] << string
|
||||
end
|
||||
|
||||
alias_method :cdata_block, :characters
|
||||
end
|
||||
|
||||
attr_accessor :document_class
|
||||
self.document_class = HashBuilder
|
||||
|
||||
def parse(data)
|
||||
if !data.respond_to?(:read)
|
||||
data = StringIO.new(data || '')
|
||||
end
|
||||
|
||||
char = data.getc
|
||||
if char.nil?
|
||||
{}
|
||||
else
|
||||
data.ungetc(char)
|
||||
document = self.document_class.new
|
||||
parser = Nokogiri::XML::SAX::Parser.new(document)
|
||||
parser.parse(data)
|
||||
document.hash
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
216
activesupport/test/xml_mini/nokogirisax_engine_test.rb
Normal file
216
activesupport/test/xml_mini/nokogirisax_engine_test.rb
Normal file
|
@ -0,0 +1,216 @@
|
|||
require 'abstract_unit'
|
||||
require 'active_support/xml_mini'
|
||||
require 'active_support/core_ext/hash/conversions'
|
||||
|
||||
begin
|
||||
require 'nokogiri'
|
||||
rescue LoadError
|
||||
# Skip nokogiri tests
|
||||
else
|
||||
|
||||
class NokogiriEngineTest < Test::Unit::TestCase
|
||||
include ActiveSupport
|
||||
|
||||
def setup
|
||||
@default_backend = XmlMini.backend
|
||||
XmlMini.backend = 'NokogiriSAX'
|
||||
end
|
||||
|
||||
def teardown
|
||||
XmlMini.backend = @default_backend
|
||||
end
|
||||
|
||||
def test_file_from_xml
|
||||
hash = Hash.from_xml(<<-eoxml)
|
||||
<blog>
|
||||
<logo type="file" name="logo.png" content_type="image/png">
|
||||
</logo>
|
||||
</blog>
|
||||
eoxml
|
||||
assert hash.has_key?('blog')
|
||||
assert hash['blog'].has_key?('logo')
|
||||
|
||||
file = hash['blog']['logo']
|
||||
assert_equal 'logo.png', file.original_filename
|
||||
assert_equal 'image/png', file.content_type
|
||||
end
|
||||
|
||||
def test_exception_thrown_on_expansion_attack
|
||||
assert_raise Nokogiri::XML::SyntaxError do
|
||||
attack_xml = <<-EOT
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE member [
|
||||
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
|
||||
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
|
||||
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
|
||||
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
|
||||
<!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;">
|
||||
<!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;">
|
||||
<!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
|
||||
]>
|
||||
<member>
|
||||
&a;
|
||||
</member>
|
||||
EOT
|
||||
Hash.from_xml(attack_xml)
|
||||
end
|
||||
end
|
||||
|
||||
def test_setting_nokogiri_as_backend
|
||||
XmlMini.backend = 'Nokogiri'
|
||||
assert_equal XmlMini_Nokogiri, XmlMini.backend
|
||||
end
|
||||
|
||||
def test_blank_returns_empty_hash
|
||||
assert_equal({}, XmlMini.parse(nil))
|
||||
assert_equal({}, XmlMini.parse(''))
|
||||
end
|
||||
|
||||
def test_array_type_makes_an_array
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<blog>
|
||||
<posts type="array">
|
||||
<post>a post</post>
|
||||
<post>another post</post>
|
||||
</posts>
|
||||
</blog>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_one_node_document_as_hash
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<products/>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_one_node_with_attributes_document_as_hash
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<products foo="bar"/>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_products_node_with_book_node_as_hash
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<products>
|
||||
<book name="awesome" id="12345" />
|
||||
</products>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_products_node_with_two_book_nodes_as_hash
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<products>
|
||||
<book name="awesome" id="12345" />
|
||||
<book name="america" id="67890" />
|
||||
</products>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_single_node_with_content_as_hash
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<products>
|
||||
hello world
|
||||
</products>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_children_with_children
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
<products>
|
||||
<book name="america" id="67890" />
|
||||
</products>
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_children_with_text
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
<products>
|
||||
hello everyone
|
||||
</products>
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_children_with_non_adjacent_text
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
good
|
||||
<products>
|
||||
hello everyone
|
||||
</products>
|
||||
morning
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_parse_from_io
|
||||
io = StringIO.new(<<-eoxml)
|
||||
<root>
|
||||
good
|
||||
<products>
|
||||
hello everyone
|
||||
</products>
|
||||
morning
|
||||
</root>
|
||||
eoxml
|
||||
XmlMini.parse(io)
|
||||
end
|
||||
|
||||
def test_children_with_simple_cdata
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
<products>
|
||||
<![CDATA[cdatablock]]>
|
||||
</products>
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_children_with_multiple_cdata
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
<products>
|
||||
<![CDATA[cdatablock1]]><![CDATA[cdatablock2]]>
|
||||
</products>
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_children_with_text_and_cdata
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
<products>
|
||||
hello <![CDATA[cdatablock]]>
|
||||
morning
|
||||
</products>
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_children_with_blank_text
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
<products> </products>
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
def test_children_with_blank_text_and_attribute
|
||||
assert_equal_rexml(<<-eoxml)
|
||||
<root>
|
||||
<products type="file"> </products>
|
||||
</root>
|
||||
eoxml
|
||||
end
|
||||
|
||||
private
|
||||
def assert_equal_rexml(xml)
|
||||
hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }
|
||||
assert_equal(hash, XmlMini.parse(xml))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
Loading…
Reference in a new issue