Handle atom feed with space around CDATA
Trim space around CDATA elements before extracting the CharData. This problem was discovered when reading https://www.sethvargo.com/feed.xml. Title and Summary fields have newlines and space between the <title> element and the CDATA element. e.g. <title> <![CDATA[Entry title here]]> </title> This meant the title of the feed was coming into MiniFlux as, <![CDATA[Entry title here]]>
This commit is contained in:
parent
7b0a4a7803
commit
cc3e65dd3c
2 changed files with 15 additions and 2 deletions
|
@ -229,10 +229,9 @@ type atom10Text struct {
|
|||
|
||||
func (a *atom10Text) String() string {
|
||||
var content string
|
||||
|
||||
switch {
|
||||
case a.Type == "", a.Type == "text", a.Type == "text/plain":
|
||||
if strings.HasPrefix(a.InnerXML, `<![CDATA[`) {
|
||||
if strings.HasPrefix(strings.TrimSpace(a.InnerXML), `<![CDATA[`) {
|
||||
content = html.EscapeString(a.CharData)
|
||||
} else {
|
||||
content = a.InnerXML
|
||||
|
|
|
@ -303,6 +303,16 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
|
|||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<title>
|
||||
<![CDATA[Entry title with space around CDATA]]>
|
||||
</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
|
@ -317,6 +327,10 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
|
|||
if feed.Entries[1].Title != "Test “Test”" {
|
||||
t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[2].Title != "Entry title with space around CDATA" {
|
||||
t.Errorf("Incorrect entry title, got: %q", feed.Entries[2].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithXHTMLTitle(t *testing.T) {
|
||||
|
|
Loading…
Add table
Reference in a new issue