Disable strict XML parsing
This change should improve parsing of broken XML feeds. See https://golang.org/pkg/encoding/xml/#Decoder
This commit is contained in:
parent
ca48f7612a
commit
36d7732234
8 changed files with 95 additions and 0 deletions
|
@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
|||
atomFeed := new(atomFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = encoding.CharsetReader
|
||||
|
||||
err := decoder.Decode(atomFeed)
|
||||
|
|
|
@ -577,3 +577,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
|
|||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWithInvalidCharacterEntity(t *testing.T) {
|
||||
data := `
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/a&b"/>
|
||||
</feed>
|
||||
`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://example.org/a&b" {
|
||||
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) {
|
|||
feeds := new(opml)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = encoding.CharsetReader
|
||||
|
||||
err := decoder.Decode(feeds)
|
||||
|
|
|
@ -193,6 +193,40 @@ func TestParseOpmlVersion1WithoutOuterOutline(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlWithInvalidCharacterEntity(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<opml version="1.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline title="Feed 1">
|
||||
<outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/a&b" htmlUrl="http://example.org/c&d"></outline>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/a&b", SiteURL: "http://example.org/c&d", CategoryName: ""})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 1)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXML(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
|
|
|
@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
|||
feed := new(rdfFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = encoding.CharsetReader
|
||||
|
||||
err := decoder.Decode(feed)
|
||||
|
|
|
@ -403,3 +403,22 @@ func TestParseFeedWithHTMLEntity(t *testing.T) {
|
|||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<title>Example Feed</title>
|
||||
<link>http://example.org/a&b</link>
|
||||
</channel>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://example.org/a&b" {
|
||||
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
|||
feed := new(rssFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = encoding.CharsetReader
|
||||
|
||||
err := decoder.Decode(feed)
|
||||
|
|
|
@ -633,3 +633,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
|
|||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWithInvalidCharacterEntity(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||
<channel>
|
||||
<link>https://example.org/a&b</link>
|
||||
<title>Example Feed</title>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/a&b" {
|
||||
t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue