From 1d6b0491a75687553fa9c37b68cd5f71aa6fee6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Mon, 29 Jun 2020 18:08:19 -0700 Subject: [PATCH] Ignore in RSS 2.0 feeds In the vast majority of cases, the default entry title is correct. Ignoring avoid overriding the default title if they are different. --- reader/rss/parser_test.go | 45 +++++++++++++++++++++++++++++++++++++++ reader/rss/rss.go | 27 +++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index e41eec88..16dd1c2d 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -136,6 +136,51 @@ func TestParseEntryWithoutTitle(t *testing.T) { } } +func TestParseEntryWithMediaTitle(t *testing.T) { + data := ` + + + https://example.org/ + + Entry Title + https://example.org/item + Media Title + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Entry Title" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryWithDCTitleOnly(t *testing.T) { + data := ` + + + https://example.org/ + + Entry Title + https://example.org/item + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "Entry Title" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + func TestParseEntryWithoutLink(t *testing.T) { data := ` diff --git a/reader/rss/rss.go b/reader/rss/rss.go index 3619ec68..cbb1bd19 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -122,6 +122,12 @@ type rssAuthor struct { Inner string `xml:",innerxml"` } +type rssTitle struct { + XMLName xml.Name + Data string `xml:",chardata"` + Inner string `xml:",innerxml"` +} + type rssEnclosure struct { URL string `xml:"url,attr"` Type string `xml:"type,attr"` @@ -138,7 +144,7 @@ func (enclosure *rssEnclosure) Size() int64 { type rssItem struct { GUID string `xml:"guid"` - Title string `xml:"title"` + Title []rssTitle `xml:"title"` Links []rssLink `xml:"link"` Description string `xml:"description"` PubDate string `xml:"pubDate"` @@ -223,7 +229,24 @@ func (r *rssItem) entryHash() string { } func (r *rssItem) entryTitle() string { - return strings.TrimSpace(sanitizer.StripTags(r.Title)) + var title string + + for _, rssTitle := range r.Title { + switch rssTitle.XMLName.Space { + case "http://search.yahoo.com/mrss/": + // Ignore title in media namespace + case "http://purl.org/dc/elements/1.1/": + title = rssTitle.Data + default: + title = rssTitle.Data + } + + if title != "" { + break + } + } + + return strings.TrimSpace(sanitizer.StripTags(title)) } func (r *rssItem) entryContent() string {