From 138fd926ee0030457d0628995c952c83dfaf436f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= <f@miniflux.net>
Date: Sun, 11 Sep 2022 22:32:16 -0700
Subject: [PATCH] Do not convert anchors to absolute links

---
 reader/sanitizer/sanitizer.go      | 44 ++++++++++++++++++------------
 reader/sanitizer/sanitizer_test.go | 10 +++++++
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go
index ad7afbca..b9b4510c 100644
--- a/reader/sanitizer/sanitizer.go
+++ b/reader/sanitizer/sanitizer.go
@@ -101,6 +101,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
 	var htmlAttrs, attrNames []string
 	var err error
 	var isImageLargerThanLayout bool
+	var isAnchorLink bool
 
 	if tagName == "img" {
 		imgWidth := getIntegerAttributeValue("width", attributes)
@@ -137,6 +138,9 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
 				}
 			} else if tagName == "img" && attribute.Key == "src" && isValidDataAttribute(attribute.Val) {
 				value = attribute.Val
+			} else if isAnchor("a", attribute) {
+				value = attribute.Val
+				isAnchorLink = true
 			} else {
 				value, err = url.AbsoluteURL(baseURL, value)
 				if err != nil {
@@ -153,10 +157,12 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
 		htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)))
 	}
 
-	extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
-	if len(extraAttrNames) > 0 {
-		attrNames = append(attrNames, extraAttrNames...)
-		htmlAttrs = append(htmlAttrs, extraHTMLAttributes...)
+	if !isAnchorLink {
+		extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
+		if len(extraAttrNames) > 0 {
+			attrNames = append(attrNames, extraAttrNames...)
+			htmlAttrs = append(htmlAttrs, extraHTMLAttributes...)
+		}
 	}
 
 	return attrNames, strings.Join(htmlAttrs, " ")
@@ -370,9 +376,9 @@ func getTagAllowList() map[string][]string {
 	whitelist["audio"] = []string{"src"}
 	whitelist["video"] = []string{"poster", "height", "width", "src"}
 	whitelist["source"] = []string{"src", "type", "srcset", "sizes", "media"}
-	whitelist["dt"] = []string{}
-	whitelist["dd"] = []string{}
-	whitelist["dl"] = []string{}
+	whitelist["dt"] = []string{"id"}
+	whitelist["dd"] = []string{"id"}
+	whitelist["dl"] = []string{"id"}
 	whitelist["table"] = []string{}
 	whitelist["caption"] = []string{}
 	whitelist["thead"] = []string{}
@@ -380,12 +386,12 @@ func getTagAllowList() map[string][]string {
 	whitelist["tr"] = []string{}
 	whitelist["td"] = []string{"rowspan", "colspan"}
 	whitelist["th"] = []string{"rowspan", "colspan"}
-	whitelist["h1"] = []string{}
-	whitelist["h2"] = []string{}
-	whitelist["h3"] = []string{}
-	whitelist["h4"] = []string{}
-	whitelist["h5"] = []string{}
-	whitelist["h6"] = []string{}
+	whitelist["h1"] = []string{"id"}
+	whitelist["h2"] = []string{"id"}
+	whitelist["h3"] = []string{"id"}
+	whitelist["h4"] = []string{"id"}
+	whitelist["h5"] = []string{"id"}
+	whitelist["h6"] = []string{"id"}
 	whitelist["strong"] = []string{}
 	whitelist["em"] = []string{}
 	whitelist["code"] = []string{}
@@ -393,12 +399,12 @@ func getTagAllowList() map[string][]string {
 	whitelist["blockquote"] = []string{}
 	whitelist["q"] = []string{"cite"}
 	whitelist["p"] = []string{}
-	whitelist["ul"] = []string{}
-	whitelist["li"] = []string{}
-	whitelist["ol"] = []string{}
+	whitelist["ul"] = []string{"id"}
+	whitelist["li"] = []string{"id"}
+	whitelist["ol"] = []string{"id"}
 	whitelist["br"] = []string{}
 	whitelist["del"] = []string{}
-	whitelist["a"] = []string{"href", "title"}
+	whitelist["a"] = []string{"href", "title", "id"}
 	whitelist["figure"] = []string{}
 	whitelist["figcaption"] = []string{}
 	whitelist["cite"] = []string{}
@@ -492,6 +498,10 @@ func isValidDataAttribute(value string) bool {
 	return false
 }
 
+func isAnchor(tagName string, attribute html.Attribute) bool {
+	return tagName == "a" && attribute.Key == "href" && strings.HasPrefix(attribute.Val, "#")
+}
+
 func isPositiveInteger(value string) bool {
 	if number, err := strconv.Atoi(value); err == nil {
 		return number > 0
diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go
index 74452590..65961875 100644
--- a/reader/sanitizer/sanitizer_test.go
+++ b/reader/sanitizer/sanitizer_test.go
@@ -203,6 +203,16 @@ func TestIFrameWithChildElements(t *testing.T) {
 	}
 }
 
+func TestAnchorLink(t *testing.T) {
+	input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
+	expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
+	output := Sanitize("http://example.org/", input)
+
+	if expected != output {
+		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
+	}
+}
+
 func TestInvalidURLScheme(t *testing.T) {
 	input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
 	expected := `<p>This link is not valid</p>`