1
0
Fork 0
mirror of https://github.com/rails/rails.git synced 2022-11-09 12:12:34 -05:00

Code cleanup, bugfixes and speed improvements for the Nokogiri and LibXML XmlMini backends

[#3641 state:committed]

Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
This commit is contained in:
Willem van Bergen 2010-01-01 10:12:30 +01:00 committed by Jeremy Kemper
parent 4f590b67b7
commit 34b03cebf9
4 changed files with 100 additions and 117 deletions

View file

@ -12,7 +12,7 @@ module ActiveSupport
if !data.respond_to?(:read)
data = StringIO.new(data || '')
end
char = data.getc
if char.nil?
{}
@ -34,106 +34,42 @@ module LibXML #:nodoc:
end
module Node #:nodoc:
CONTENT_ROOT = '__content__'
LIB_XML_LIMIT = 30000000 # Hardcoded LibXML limit
CONTENT_ROOT = '__content__'.freeze
# Convert XML document to hash
#
# hash::
# Hash to merge the converted element into.
def to_hash(hash={})
if text? || cdata?
raise LibXML::XML::Error if hash[CONTENT_ROOT].to_s.length + content.length >= LIB_XML_LIMIT
hash[CONTENT_ROOT] = hash[CONTENT_ROOT].to_s + content
else
sub_hash = insert_name_into_hash(hash, name)
attributes_to_hash(sub_hash)
if array?
children_array_to_hash(sub_hash)
elsif yaml?
children_yaml_to_hash(sub_hash)
else
children_to_hash(sub_hash)
node_hash = {}
# Insert node hash into parent hash correctly.
case hash[name]
when Array then hash[name] << node_hash
when Hash then hash[name] = [hash[name], node_hash]
when nil then hash[name] = node_hash
end
# Handle child elements
each_child do |c|
if c.element?
c.to_hash(node_hash)
elsif c.text? || c.cdata?
node_hash[CONTENT_ROOT] ||= ''
node_hash[CONTENT_ROOT] << c.content
end
end
# Remove content node if it is blank
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
node_hash.delete(CONTENT_ROOT)
end
# Handle attributes
each_attr { |a| node_hash[a.name] = a.value }
hash
end
protected
# Insert name into hash
#
# hash::
# Hash to merge the converted element into.
# name::
# name to to merge into hash
def insert_name_into_hash(hash, name)
sub_hash = {}
if hash[name]
if !hash[name].kind_of? Array
hash[name] = [hash[name]]
end
hash[name] << sub_hash
else
hash[name] = sub_hash
end
sub_hash
end
# Insert children into hash
#
# hash::
# Hash to merge the children into.
def children_to_hash(hash={})
each { |child| child.to_hash(hash) }
if hash.length > 1 && hash[CONTENT_ROOT].blank?
hash.delete(CONTENT_ROOT)
end
attributes_to_hash(hash)
hash
end
# Convert xml attributes to hash
#
# hash::
# Hash to merge the attributes into
def attributes_to_hash(hash={})
each_attr { |attr| hash[attr.name] = attr.value }
hash
end
# Convert array into hash
#
# hash::
# Hash to merge the array into
def children_array_to_hash(hash={})
hash[child.name] = map do |child|
returning({}) { |sub_hash| child.children_to_hash(sub_hash) }
end
hash
end
# Convert yaml into hash
#
# hash::
# Hash to merge the yaml into
def children_yaml_to_hash(hash = {})
hash[CONTENT_ROOT] = content unless content.blank?
hash
end
# Check if child is of type array
def array?
child? && child.next? && child.name == child.next.name
end
# Check if child is of type yaml
def yaml?
attributes.collect{|x| x.value}.include?('yaml')
end
end
end
end

View file

@ -12,13 +12,13 @@ module ActiveSupport
if !data.respond_to?(:read)
data = StringIO.new(data || '')
end
char = data.getc
if char.nil?
{}
else
data.ungetc(char)
doc = Nokogiri::XML(data) { |cfg| cfg.noblanks }
doc = Nokogiri::XML(data)
raise doc.errors.first if doc.errors.length > 0
doc.to_hash
end
@ -32,40 +32,42 @@ module ActiveSupport
end
module Node #:nodoc:
CONTENT_ROOT = '__content__'
CONTENT_ROOT = '__content__'.freeze
# Convert XML document to hash
#
# hash::
# Hash to merge the converted element into.
def to_hash(hash = {})
attributes = attributes_as_hash
if hash[name]
hash[name] = [hash[name]].flatten
hash[name] << attributes
else
hash[name] ||= attributes
def to_hash(hash={})
node_hash = {}
# Insert node hash into parent hash correctly.
case hash[name]
when Array then hash[name] << node_hash
when Hash then hash[name] = [hash[name], node_hash]
when nil then hash[name] = node_hash
end
children.each { |child|
next if child.blank? && 'file' != self['type']
if child.text? || child.cdata?
(attributes[CONTENT_ROOT] ||= '') << child.content
next
# Handle child elements
children.each do |c|
if c.element?
c.to_hash(node_hash)
elsif c.text? || c.cdata?
node_hash[CONTENT_ROOT] ||= ''
node_hash[CONTENT_ROOT] << c.content
end
end
child.to_hash attributes
}
# Remove content node if it is blank and there are child tags
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
node_hash.delete(CONTENT_ROOT)
end
# Handle attributes
attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
hash
end
def attributes_as_hash
Hash[*(attribute_nodes.map { |node|
[node.node_name, node.value]
}.flatten)]
end
end
end

View file

@ -184,6 +184,15 @@ class LibxmlEngineTest < Test::Unit::TestCase
eoxml
end
def test_children_with_blank_text_and_attribute
assert_equal_rexml(<<-eoxml)
<root>
<products type="file"> </products>
</root>
eoxml
end
private
def assert_equal_rexml(xml)
hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }

View file

@ -159,17 +159,53 @@ class NokogiriEngineTest < Test::Unit::TestCase
XmlMini.parse(io)
end
def test_children_with_cdata
def test_children_with_simple_cdata
assert_equal_rexml(<<-eoxml)
<root>
<products>
hello <![CDATA[everyone]]>
<![CDATA[cdatablock]]>
</products>
</root>
eoxml
end
def test_children_with_multiple_cdata
assert_equal_rexml(<<-eoxml)
<root>
<products>
<![CDATA[cdatablock1]]><![CDATA[cdatablock2]]>
</products>
</root>
eoxml
end
def test_children_with_text_and_cdata
assert_equal_rexml(<<-eoxml)
<root>
<products>
hello <![CDATA[cdatablock]]>
morning
</products>
</root>
eoxml
end
def test_children_with_blank_text
assert_equal_rexml(<<-eoxml)
<root>
<products> </products>
</root>
eoxml
end
def test_children_with_blank_text_and_attribute
assert_equal_rexml(<<-eoxml)
<root>
<products type="file"> </products>
</root>
eoxml
end
private
def assert_equal_rexml(xml)
hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }