From aaa1625724ce17147a0b7939aad8913519ac5071 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= <f@miniflux.net>
Date: Sun, 26 Feb 2023 17:09:50 -0800
Subject: [PATCH] Ignore empty link when discovering feeds

---
 reader/subscription/finder.go      |   6 +-
 reader/subscription/finder_test.go | 291 ++++++++++++++++++++++++++++-
 2 files changed, 293 insertions(+), 4 deletions(-)
diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go
index 718116d1..77eba185 100644
--- a/reader/subscription/finder.go
+++ b/reader/subscription/finder.go
@@ -87,12 +87,12 @@ func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.Loc
 
 			if title, exists := s.Attr("title"); exists {
 				subscription.Title = title
-			} else {
-				subscription.Title = "Feed"
 			}
 
 			if feedURL, exists := s.Attr("href"); exists {
-				subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
+				if feedURL != "" {
+					subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
+				}
 			}
 
 			if subscription.Title == "" {
diff --git a/reader/subscription/finder_test.go b/reader/subscription/finder_test.go
index f4531ea2..c7010846 100644
--- a/reader/subscription/finder_test.go
+++ b/reader/subscription/finder_test.go
@@ -4,7 +4,10 @@
 
 package subscription
 
-import "testing"
+import (
+	"strings"
+	"testing"
+)
 
 func TestFindYoutubeChannelFeed(t *testing.T) {
 	scenarios := map[string]string{
@@ -19,3 +22,289 @@ func TestFindYoutubeChannelFeed(t *testing.T) {
 		}
 	}
 }
+
+func TestParseWebPageWithRssFeed(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/rss" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "rss" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithAtomFeed(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/atom.xml" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "atom" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithJSONFeed(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithEmptyTitle(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="/feed.json" rel="alternate" type="application/feed+json">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithMultipleFeeds(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
+			<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 2 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Atom Feed" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/atom.xml" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "atom" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+
+	if subscriptions[1].Title != "JSON Feed" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[1].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[1].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 0 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+}
+
+func TestParseWebPageWithNoHref(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 0 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+}