Handle RSS entries with only a GUID permalink
This commit is contained in:
parent
cd7f01f573
commit
d947b0194b
2 changed files with 47 additions and 2 deletions
|
@ -95,6 +95,10 @@ func TestParseRss2Sample(t *testing.T) {
|
|||
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.` {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[1].URL != "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
|
@ -230,6 +234,34 @@ func TestParseEntryWithoutLink(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithOnlyGuidPermalink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<guid isPermaLink="true">https://example.org/some-article.html</guid>
|
||||
</item>
|
||||
<item>
|
||||
<guid>https://example.org/another-article.html</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/some-article.html" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[1].URL != "https://example.org/another-article.html" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[1].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAtomLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
|
|
|
@ -118,6 +118,12 @@ func (r rssFeed) feedAuthor() string {
|
|||
return sanitizer.StripTags(strings.TrimSpace(author))
|
||||
}
|
||||
|
||||
type rssGUID struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
IsPermaLink string `xml:"isPermaLink,attr"`
|
||||
}
|
||||
|
||||
type rssLink struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
|
@ -159,7 +165,7 @@ func (enclosure *rssEnclosure) Size() int64 {
|
|||
}
|
||||
|
||||
type rssItem struct {
|
||||
GUID string `xml:"guid"`
|
||||
GUID rssGUID `xml:"guid"`
|
||||
Title []rssTitle `xml:"title"`
|
||||
Links []rssLink `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
|
@ -237,7 +243,7 @@ func (r *rssItem) entryAuthor() string {
|
|||
}
|
||||
|
||||
func (r *rssItem) entryHash() string {
|
||||
for _, value := range []string{r.GUID, r.entryURL()} {
|
||||
for _, value := range []string{r.GUID.Data, r.entryURL()} {
|
||||
if value != "" {
|
||||
return crypto.Hash(value)
|
||||
}
|
||||
|
@ -291,6 +297,13 @@ func (r *rssItem) entryURL() string {
|
|||
}
|
||||
}
|
||||
|
||||
// Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
|
||||
// isPermaLink is optional, its default value is true.
|
||||
// If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
|
||||
if r.GUID.IsPermaLink == "true" || r.GUID.IsPermaLink == "" {
|
||||
return strings.TrimSpace(r.GUID.Data)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue