From aaa1625724ce17147a0b7939aad8913519ac5071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 26 Feb 2023 17:09:50 -0800 Subject: [PATCH] Ignore empty link when discovering feeds --- reader/subscription/finder.go | 6 +- reader/subscription/finder_test.go | 291 ++++++++++++++++++++++++++++- 2 files changed, 293 insertions(+), 4 deletions(-) diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go index 718116d1..77eba185 100644 --- a/reader/subscription/finder.go +++ b/reader/subscription/finder.go @@ -87,12 +87,12 @@ func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.Loc if title, exists := s.Attr("title"); exists { subscription.Title = title - } else { - subscription.Title = "Feed" } if feedURL, exists := s.Attr("href"); exists { - subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL) + if feedURL != "" { + subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL) + } } if subscription.Title == "" { diff --git a/reader/subscription/finder_test.go b/reader/subscription/finder_test.go index f4531ea2..c7010846 100644 --- a/reader/subscription/finder_test.go +++ b/reader/subscription/finder_test.go @@ -4,7 +4,10 @@ package subscription -import "testing" +import ( + "strings" + "testing" +) func TestFindYoutubeChannelFeed(t *testing.T) { scenarios := map[string]string{ @@ -19,3 +22,289 @@ func TestFindYoutubeChannelFeed(t *testing.T) { } } } + +func TestParseWebPageWithRssFeed(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 1 { + t.Fatal(`Incorrect number of subscriptions returned`) + } + + if subscriptions[0].Title != "Some Title" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[0].URL != "http://example.org/rss" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[0].Type != "rss" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } +} + +func TestParseWebPageWithAtomFeed(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 1 { + t.Fatal(`Incorrect number of subscriptions returned`) + } + + if subscriptions[0].Title != "Some Title" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[0].URL != "http://example.org/atom.xml" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[0].Type != "atom" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } +} + +func TestParseWebPageWithJSONFeed(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 1 { + t.Fatal(`Incorrect number of subscriptions returned`) + } + + if subscriptions[0].Title != "Some Title" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[0].URL != "http://example.org/feed.json" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[0].Type != "json" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } +} + +func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 1 { + t.Fatal(`Incorrect number of subscriptions returned`) + } + + if subscriptions[0].Title != "Some Title" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[0].URL != "http://example.org/feed.json" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[0].Type != "json" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } +} + +func TestParseWebPageWithRelativeFeedURL(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 1 { + t.Fatal(`Incorrect number of subscriptions returned`) + } + + if subscriptions[0].Title != "Some Title" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[0].URL != "http://example.org/feed.json" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[0].Type != "json" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } +} + +func TestParseWebPageWithEmptyTitle(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 1 { + t.Fatal(`Incorrect number of subscriptions returned`) + } + + if subscriptions[0].Title != "http://example.org/feed.json" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[0].URL != "http://example.org/feed.json" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[0].Type != "json" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } +} + +func TestParseWebPageWithMultipleFeeds(t *testing.T) { + htmlPage := ` + + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 2 { + t.Fatal(`Incorrect number of subscriptions returned`) + } + + if subscriptions[0].Title != "Atom Feed" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[0].URL != "http://example.org/atom.xml" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[0].Type != "atom" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } + + if subscriptions[1].Title != "JSON Feed" { + t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title) + } + + if subscriptions[1].URL != "http://example.org/feed.json" { + t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL) + } + + if subscriptions[1].Type != "json" { + t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type) + } +} + +func TestParseWebPageWithEmptyFeedURL(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 0 { + t.Fatal(`Incorrect number of subscriptions returned`) + } +} + +func TestParseWebPageWithNoHref(t *testing.T) { + htmlPage := ` + + + + + + + + ` + + subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage)) + if err != nil { + t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err) + } + + if len(subscriptions) != 0 { + t.Fatal(`Incorrect number of subscriptions returned`) + } +}