diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go index 6a64ae4f..64f79214 100644 --- a/reader/atom/atom_10.go +++ b/reader/atom/atom_10.go @@ -48,7 +48,7 @@ func (a *atom10Feed) Transform(baseURL string) *model.Feed { feed.SiteURL = siteURL } - feed.Title = a.Title.String() + feed.Title = html.UnescapeString(a.Title.String()) if feed.Title == "" { feed.Title = feed.SiteURL } @@ -100,7 +100,7 @@ func (a *atom10Entry) Transform() *model.Entry { } func (a *atom10Entry) entryTitle() string { - return a.Title.String() + return html.UnescapeString(a.Title.String()) } func (a *atom10Entry) entryContent() string { @@ -221,20 +221,19 @@ func (a *atom10Entry) entryCommentsURL() string { } type atom10Text struct { - Type string `xml:"type,attr"` - Data string `xml:",chardata"` - XML string `xml:",innerxml"` + Type string `xml:"type,attr"` + CharData string `xml:",chardata"` + InnerXML string `xml:",innerxml"` } func (a *atom10Text) String() string { - content := "" + var content string - switch { - case a.Type == "xhtml": - content = a.XML - default: - content = a.Data + if a.Type == "xhtml" { + content = a.InnerXML + } else { + content = a.CharData } - return html.UnescapeString(strings.TrimSpace(content)) + return strings.TrimSpace(content) } diff --git a/reader/atom/atom_10_test.go b/reader/atom/atom_10_test.go index 9626d60f..3f7fdbc0 100644 --- a/reader/atom/atom_10_test.go +++ b/reader/atom/atom_10_test.go @@ -244,7 +244,33 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) { } } -func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { +func TestParseEntryWithPlainTextTitle(t *testing.T) { + data := ` + + Example Feed + + + + AT&T bought by SBC! + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != `AT&T bought by SBC!` { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryWithHTMLAndCDATATitle(t *testing.T) { data := ` Example Feed @@ -270,7 +296,7 @@ func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { } } -func TestParseEntryTitleWithHTML(t *testing.T) { +func TestParseEntryWithHTMLTitle(t *testing.T) { data := ` Example Feed @@ -296,7 +322,7 @@ func TestParseEntryTitleWithHTML(t *testing.T) { } } -func TestParseEntryTitleWithXHTML(t *testing.T) { +func TestParseEntryWithXHTMLTitle(t *testing.T) { data := ` Example Feed @@ -322,7 +348,7 @@ func TestParseEntryTitleWithXHTML(t *testing.T) { } } -func TestParseEntryTitleWithNumericCharacterReference(t *testing.T) { +func TestParseEntryWithNumericCharacterReferenceTitle(t *testing.T) { data := ` Example Feed @@ -348,7 +374,7 @@ func TestParseEntryTitleWithNumericCharacterReference(t *testing.T) { } } -func TestParseEntryTitleWithDoubleEncodedEntities(t *testing.T) { +func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) { data := ` Example Feed @@ -374,14 +400,14 @@ func TestParseEntryTitleWithDoubleEncodedEntities(t *testing.T) { } } -func TestParseEntrySummaryWithXHTML(t *testing.T) { +func TestParseEntryWithXHTMLSummary(t *testing.T) { data := ` Example Feed - <code>Test</code> Test + Example urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z @@ -400,14 +426,14 @@ func TestParseEntrySummaryWithXHTML(t *testing.T) { } } -func TestParseEntrySummaryWithHTML(t *testing.T) { +func TestParseEntryWithHTMLAndCDATASummary(t *testing.T) { data := ` Example Feed - <code>Test</code> Test + Example urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z @@ -426,14 +452,14 @@ func TestParseEntrySummaryWithHTML(t *testing.T) { } } -func TestParseEntrySummaryWithPlainText(t *testing.T) { +func TestParseEntryWithPlainTextAndCDATASummary(t *testing.T) { data := ` Example Feed - <code>Test</code> Test + Example urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z @@ -452,6 +478,112 @@ func TestParseEntrySummaryWithPlainText(t *testing.T) { } } +func TestParseEntryWithTextAndCDATAContent(t *testing.T) { + data := ` + + Example Feed + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + + + + ` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "AT&T bought by SBC!" { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } +} + +func TestParseEntryWithTextContent(t *testing.T) { + data := ` + + Example Feed + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + AT&T bought by SBC! + + + ` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "AT&T bought by SBC!" { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } +} + +func TestParseEntryWithHTMLContent(t *testing.T) { + data := ` + + Example Feed + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + AT&amp;T bought <b>by SBC</b>! + + + ` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != "AT&T bought by SBC!" { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } +} + +func TestParseEntryWithXHTMLContent(t *testing.T) { + data := ` + + Example Feed + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + +
AT&T bought by SBC!
+
+
+ +
` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Content != `
AT&T bought by SBC!
` { + t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + } +} + func TestParseEntryWithAuthorName(t *testing.T) { data := `