diff --git a/proxy/image_proxy.go b/proxy/image_proxy.go
index 07b087b9..e486654e 100644
--- a/proxy/image_proxy.go
+++ b/proxy/image_proxy.go
@@ -5,18 +5,16 @@
package proxy // import "miniflux.app/proxy"
import (
- "regexp"
"strings"
"miniflux.app/config"
+ "miniflux.app/reader/sanitizer"
"miniflux.app/url"
"github.com/PuerkitoBio/goquery"
"github.com/gorilla/mux"
)
-var regexSplitSrcset = regexp.MustCompile(`,\s+`)
-
// ImageProxyRewriter replaces image URLs with internal proxy URLs.
func ImageProxyRewriter(router *mux.Router, data string) string {
proxyImages := config.Opts.ProxyImages()
@@ -30,24 +28,20 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
}
doc.Find("img").Each(func(i int, img *goquery.Selection) {
- if srcAttr, ok := img.Attr("src"); ok {
- if !isDataURL(srcAttr) && (proxyImages == "all" || !url.IsHTTPS(srcAttr)) {
- img.SetAttr("src", ProxifyURL(router, srcAttr))
+ if srcAttrValue, ok := img.Attr("src"); ok {
+ if !isDataURL(srcAttrValue) && (proxyImages == "all" || !url.IsHTTPS(srcAttrValue)) {
+ img.SetAttr("src", ProxifyURL(router, srcAttrValue))
}
}
- if srcsetAttr, ok := img.Attr("srcset"); ok {
- if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
- proxifySourceSet(img, router, srcsetAttr)
- }
+ if srcsetAttrValue, ok := img.Attr("srcset"); ok {
+ proxifySourceSet(img, router, proxyImages, srcsetAttrValue)
}
})
doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
- if srcsetAttr, ok := sourceElement.Attr("srcset"); ok {
- if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
- proxifySourceSet(sourceElement, router, srcsetAttr)
- }
+ if srcsetAttrValue, ok := sourceElement.Attr("srcset"); ok {
+ proxifySourceSet(sourceElement, router, proxyImages, srcsetAttrValue)
}
})
@@ -59,30 +53,16 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
return output
}
-func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeValue string) {
- var proxifiedSources []string
+func proxifySourceSet(element *goquery.Selection, router *mux.Router, proxyImages, srcsetAttrValue string) {
+ imageCandidates := sanitizer.ParseSrcSetAttribute(srcsetAttrValue)
- for _, source := range regexSplitSrcset.Split(attributeValue, -1) {
- parts := strings.Split(strings.TrimSpace(source), " ")
- nbParts := len(parts)
-
- if nbParts > 0 {
- rewrittenSource := parts[0]
- if !isDataURL(rewrittenSource) {
- rewrittenSource = ProxifyURL(router, rewrittenSource)
- }
-
- if nbParts > 1 {
- rewrittenSource += " " + parts[1]
- }
-
- proxifiedSources = append(proxifiedSources, rewrittenSource)
+ for _, imageCandidate := range imageCandidates {
+ if !isDataURL(imageCandidate.ImageURL) && (proxyImages == "all" || !url.IsHTTPS(imageCandidate.ImageURL)) {
+ imageCandidate.ImageURL = ProxifyURL(router, imageCandidate.ImageURL)
}
}
- if len(proxifiedSources) > 0 {
- element.SetAttr("srcset", strings.Join(proxifiedSources, ", "))
- }
+ element.SetAttr("srcset", imageCandidates.String())
}
func isDataURL(s string) bool {
diff --git a/proxy/image_proxy_test.go b/proxy/image_proxy_test.go
index 3b91c0fd..c658caef 100644
--- a/proxy/image_proxy_test.go
+++ b/proxy/image_proxy_test.go
@@ -234,8 +234,31 @@ func TestProxyFilterWithPictureSource(t *testing.T) {
r := mux.NewRouter()
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
- input := ``
- expected := ``
+ input := ``
+ expected := ``
+ output := ImageProxyRewriter(r, input)
+
+ if expected != output {
+ t.Errorf(`Not expected output: got %s`, output)
+ }
+}
+
+func TestProxyFilterOnlyNonHTTPWithPictureSource(t *testing.T) {
+ os.Clearenv()
+ os.Setenv("PROXY_IMAGES", "https")
+
+ var err error
+ parser := config.NewParser()
+ config.Opts, err = parser.ParseEnvironmentVariables()
+ if err != nil {
+ t.Fatalf(`Parsing failure: %v`, err)
+ }
+
+ r := mux.NewRouter()
+ r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
+
+ input := ``
+ expected := ``
output := ImageProxyRewriter(r, input)
if expected != output {
diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go
index 92ae869e..f133247b 100644
--- a/reader/sanitizer/sanitizer.go
+++ b/reader/sanitizer/sanitizer.go
@@ -20,7 +20,6 @@ import (
var (
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
- splitSrcsetRegex = regexp.MustCompile(`,\s?`)
)
// Sanitize returns safe HTML.
@@ -447,52 +446,17 @@ func isBlockedTag(tagName string) bool {
return false
}
-/*
-
-One or more strings separated by commas, indicating possible image sources for the user agent to use.
-
-Each string is composed of:
-- A URL to an image
-- Optionally, whitespace followed by one of:
-- A width descriptor (a positive integer directly followed by w). The width descriptor is divided by the source size given in the sizes attribute to calculate the effective pixel density.
-- A pixel density descriptor (a positive floating point number directly followed by x).
-
-*/
func sanitizeSrcsetAttr(baseURL, value string) string {
- var sanitizedSources []string
- rawSources := splitSrcsetRegex.Split(value, -1)
- for _, rawSource := range rawSources {
- parts := strings.Split(strings.TrimSpace(rawSource), " ")
- nbParts := len(parts)
+ imageCandidates := ParseSrcSetAttribute(value)
- if nbParts > 0 {
- sanitizedSource, err := url.AbsoluteURL(baseURL, parts[0])
- if err != nil {
- continue
- }
-
- if nbParts == 2 && isValidWidthOrDensityDescriptor(parts[1]) {
- sanitizedSource += " " + parts[1]
- }
-
- sanitizedSources = append(sanitizedSources, sanitizedSource)
+ for _, imageCandidate := range imageCandidates {
+ absoluteURL, err := url.AbsoluteURL(baseURL, imageCandidate.ImageURL)
+ if err == nil {
+ imageCandidate.ImageURL = absoluteURL
}
}
- return strings.Join(sanitizedSources, ", ")
-}
-func isValidWidthOrDensityDescriptor(value string) bool {
- if value == "" {
- return false
- }
-
- lastChar := value[len(value)-1:]
- if lastChar != "w" && lastChar != "x" {
- return false
- }
-
- _, err := strconv.ParseFloat(value[0:len(value)-1], 32)
- return err == nil
+ return imageCandidates.String()
}
func isValidDataAttribute(value string) bool {
diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go
index aee7ba4e..695c1e49 100644
--- a/reader/sanitizer/sanitizer_test.go
+++ b/reader/sanitizer/sanitizer_test.go
@@ -85,16 +85,6 @@ func TestMediumImgWithSrcset(t *testing.T) {
}
}
-func TestEconomistImgWithSrcset(t *testing.T) {
- input := `
`
- expected := `
`
- output := Sanitize("http://example.org/", input)
-
- if output != expected {
- t.Errorf(`Wrong output: %s`, output)
- }
-}
-
func TestSelfClosingTags(t *testing.T) {
input := `
This
is a text
with an image:
.
`
output := Sanitize("http://example.org/", input)
diff --git a/reader/sanitizer/srcset.go b/reader/sanitizer/srcset.go
new file mode 100644
index 00000000..758d8d5c
--- /dev/null
+++ b/reader/sanitizer/srcset.go
@@ -0,0 +1,82 @@
+// Copyright 2022 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package sanitizer
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+type ImageCandidate struct {
+ ImageURL string
+ Descriptor string
+}
+
+type ImageCandidates []*ImageCandidate
+
+func (c ImageCandidates) String() string {
+ var htmlCandidates []string
+
+ for _, imageCandidate := range c {
+ var htmlCandidate string
+ if imageCandidate.Descriptor != "" {
+ htmlCandidate = fmt.Sprintf(`%s %s`, imageCandidate.ImageURL, imageCandidate.Descriptor)
+ } else {
+ htmlCandidate = imageCandidate.ImageURL
+ }
+
+ htmlCandidates = append(htmlCandidates, htmlCandidate)
+ }
+
+ return strings.Join(htmlCandidates, ", ")
+}
+
+// ParseSrcSetAttribute returns the list of image candidates from the set.
+// https://html.spec.whatwg.org/#parse-a-srcset-attribute
+func ParseSrcSetAttribute(attributeValue string) (imageCandidates ImageCandidates) {
+ unparsedCandidates := strings.Split(attributeValue, ", ")
+
+ for _, unparsedCandidate := range unparsedCandidates {
+ if candidate, err := parseImageCandidate(unparsedCandidate); err == nil {
+ imageCandidates = append(imageCandidates, candidate)
+ }
+ }
+
+ return imageCandidates
+}
+
+func parseImageCandidate(input string) (*ImageCandidate, error) {
+ input = strings.TrimSpace(input)
+ parts := strings.Split(strings.TrimSpace(input), " ")
+ nbParts := len(parts)
+
+ if nbParts > 2 || nbParts == 0 {
+ return nil, fmt.Errorf(`srcset: invalid number of descriptors`)
+ }
+
+ if nbParts == 2 {
+ if !isValidWidthOrDensityDescriptor(parts[1]) {
+ return nil, fmt.Errorf(`srcset: invalid descriptor`)
+ }
+ return &ImageCandidate{ImageURL: parts[0], Descriptor: parts[1]}, nil
+ }
+
+ return &ImageCandidate{ImageURL: parts[0]}, nil
+}
+
+func isValidWidthOrDensityDescriptor(value string) bool {
+ if value == "" {
+ return false
+ }
+
+ lastChar := value[len(value)-1:]
+ if lastChar != "w" && lastChar != "x" {
+ return false
+ }
+
+ _, err := strconv.ParseFloat(value[0:len(value)-1], 32)
+ return err == nil
+}
diff --git a/reader/sanitizer/srcset_test.go b/reader/sanitizer/srcset_test.go
new file mode 100644
index 00000000..4f19f9c3
--- /dev/null
+++ b/reader/sanitizer/srcset_test.go
@@ -0,0 +1,85 @@
+// Copyright 2022 Frédéric Guillot. All rights reserved.
+// Use of this source code is governed by the Apache 2.0
+// license that can be found in the LICENSE file.
+
+package sanitizer
+
+import "testing"
+
+func TestParseSrcSetAttributeWithRelativeURLs(t *testing.T) {
+ input := `example-320w.jpg, example-480w.jpg 1.5x, example-640,w.jpg 2x, example-640w.jpg 640w`
+ candidates := ParseSrcSetAttribute(input)
+
+ if len(candidates) != 4 {
+ t.Error(`Incorrect number of candidates`)
+ }
+
+ if candidates.String() != `example-320w.jpg, example-480w.jpg 1.5x, example-640,w.jpg 2x, example-640w.jpg 640w` {
+ t.Errorf(`Unexpected string output`)
+ }
+}
+
+func TestParseSrcSetAttributeWithAbsoluteURLs(t *testing.T) {
+ input := `http://example.org/example-320w.jpg 320w, http://example.org/example-480w.jpg 1.5x`
+ candidates := ParseSrcSetAttribute(input)
+
+ if len(candidates) != 2 {
+ t.Error(`Incorrect number of candidates`)
+ }
+
+ if candidates.String() != `http://example.org/example-320w.jpg 320w, http://example.org/example-480w.jpg 1.5x` {
+ t.Errorf(`Unexpected string output`)
+ }
+}
+
+func TestParseSrcSetAttributeWithOneCandidate(t *testing.T) {
+ input := `http://example.org/example-320w.jpg`
+ candidates := ParseSrcSetAttribute(input)
+
+ if len(candidates) != 1 {
+ t.Error(`Incorrect number of candidates`)
+ }
+
+ if candidates.String() != `http://example.org/example-320w.jpg` {
+ t.Errorf(`Unexpected string output`)
+ }
+}
+
+func TestParseSrcSetAttributeWithCommaURL(t *testing.T) {
+ input := `http://example.org/example,a:b/d.jpg , example-480w.jpg 1.5x`
+ candidates := ParseSrcSetAttribute(input)
+
+ if len(candidates) != 2 {
+ t.Error(`Incorrect number of candidates`)
+ }
+
+ if candidates.String() != `http://example.org/example,a:b/d.jpg, example-480w.jpg 1.5x` {
+ t.Errorf(`Unexpected string output`)
+ }
+}
+
+func TestParseSrcSetAttributeWithIncorrectDescriptor(t *testing.T) {
+ input := `http://example.org/example-320w.jpg test`
+ candidates := ParseSrcSetAttribute(input)
+
+ if len(candidates) != 0 {
+ t.Error(`Incorrect number of candidates`)
+ }
+
+ if candidates.String() != `` {
+ t.Errorf(`Unexpected string output`)
+ }
+}
+
+func TestParseSrcSetAttributeWithTooManyDescriptors(t *testing.T) {
+ input := `http://example.org/example-320w.jpg 10w 1x`
+ candidates := ParseSrcSetAttribute(input)
+
+ if len(candidates) != 0 {
+ t.Error(`Incorrect number of candidates`)
+ }
+
+ if candidates.String() != `` {
+ t.Errorf(`Unexpected string output`)
+ }
+}