2008-01-05 08:32:06 -05:00
|
|
|
require 'abstract_unit'
|
2007-02-04 15:04:40 -05:00
|
|
|
|
|
|
|
class DocumentTest < Test::Unit::TestCase
|
|
|
|
def test_handle_doctype
|
|
|
|
doc = nil
|
|
|
|
assert_nothing_raised do
|
|
|
|
doc = HTML::Document.new <<-HTML.strip
|
|
|
|
<!DOCTYPE "blah" "blah" "blah">
|
|
|
|
<html>
|
|
|
|
</html>
|
|
|
|
HTML
|
|
|
|
end
|
|
|
|
assert_equal 3, doc.root.children.length
|
|
|
|
assert_equal %{<!DOCTYPE "blah" "blah" "blah">}, doc.root.children[0].content
|
|
|
|
assert_match %r{\s+}m, doc.root.children[1].content
|
|
|
|
assert_equal "html", doc.root.children[2].name
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_find_img
|
|
|
|
doc = HTML::Document.new <<-HTML.strip
|
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<p><img src="hello.gif"></p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
HTML
|
|
|
|
assert doc.find(:tag=>"img", :attributes=>{"src"=>"hello.gif"})
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_find_all
|
|
|
|
doc = HTML::Document.new <<-HTML.strip
|
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<p class="test"><img src="hello.gif"></p>
|
|
|
|
<div class="foo">
|
|
|
|
<p class="test">something</p>
|
|
|
|
<p>here is <em class="test">more</em></p>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
HTML
|
|
|
|
all = doc.find_all :attributes => { :class => "test" }
|
|
|
|
assert_equal 3, all.length
|
|
|
|
assert_equal [ "p", "p", "em" ], all.map { |n| n.name }
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_find_with_text
|
|
|
|
doc = HTML::Document.new <<-HTML.strip
|
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<p>Some text</p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
HTML
|
|
|
|
assert doc.find(:content => "Some text")
|
|
|
|
assert doc.find(:tag => "p", :child => { :content => "Some text" })
|
|
|
|
assert doc.find(:tag => "p", :child => "Some text")
|
|
|
|
assert doc.find(:tag => "p", :content => "Some text")
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_parse_xml
|
|
|
|
assert_nothing_raised { HTML::Document.new("<tags><tag/></tags>", true, true) }
|
|
|
|
assert_nothing_raised { HTML::Document.new("<outer><link>something</link></outer>", true, true) }
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_parse_document
|
|
|
|
doc = HTML::Document.new(<<-HTML)
|
|
|
|
<div>
|
|
|
|
<h2>blah</h2>
|
|
|
|
<table>
|
|
|
|
</table>
|
|
|
|
</div>
|
|
|
|
HTML
|
|
|
|
assert_not_nil doc.find(:tag => "div", :children => { :count => 1, :only => { :tag => "table" } })
|
|
|
|
end
|
|
|
|
|
2007-09-21 16:45:49 -04:00
|
|
|
def test_tag_nesting_nothing_to_s
|
|
|
|
doc = HTML::Document.new("<tag></tag>")
|
|
|
|
assert_equal "<tag></tag>", doc.root.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_tag_nesting_space_to_s
|
|
|
|
doc = HTML::Document.new("<tag> </tag>")
|
|
|
|
assert_equal "<tag> </tag>", doc.root.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_tag_nesting_text_to_s
|
|
|
|
doc = HTML::Document.new("<tag>text</tag>")
|
|
|
|
assert_equal "<tag>text</tag>", doc.root.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_tag_nesting_tag_to_s
|
|
|
|
doc = HTML::Document.new("<tag><nested /></tag>")
|
|
|
|
assert_equal "<tag><nested /></tag>", doc.root.to_s
|
|
|
|
end
|
|
|
|
|
2007-02-04 15:04:40 -05:00
|
|
|
def test_parse_cdata
|
|
|
|
doc = HTML::Document.new(<<-HTML)
|
|
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
|
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
|
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
|
|
|
<head>
|
|
|
|
<title><![CDATA[<br>]]></title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<p>this document has <br> for a title</p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
HTML
|
|
|
|
|
|
|
|
assert_nil doc.find(:tag => "title", :descendant => { :tag => "br" })
|
|
|
|
assert doc.find(:tag => "title", :child => "<br>")
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_find_empty_tag
|
|
|
|
doc = HTML::Document.new("<div id='map'></div>")
|
|
|
|
assert_nil doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /./)
|
|
|
|
assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /\A\Z/)
|
|
|
|
assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /^$/)
|
|
|
|
assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => "")
|
|
|
|
assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => nil)
|
|
|
|
end
|
2008-06-25 01:21:58 -04:00
|
|
|
|
|
|
|
def test_parse_invalid_document
|
|
|
|
assert_nothing_raised do
|
|
|
|
doc = HTML::Document.new("<html>
|
|
|
|
<table>
|
|
|
|
<tr>
|
|
|
|
<td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td>
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
</html>")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_invalid_document_raises_exception_when_strict
|
2009-03-08 16:11:58 -04:00
|
|
|
assert_raise RuntimeError do
|
2008-06-25 01:21:58 -04:00
|
|
|
doc = HTML::Document.new("<html>
|
|
|
|
<table>
|
|
|
|
<tr>
|
|
|
|
<td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td>
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
</html>", true)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2007-02-04 15:04:40 -05:00
|
|
|
end
|