diff --git a/reader/readability/readability.go b/reader/readability/readability.go
index d58ff221..b9891a19 100644
--- a/reader/readability/readability.go
+++ b/reader/readability/readability.go
@@ -76,7 +76,7 @@ func ExtractContent(page io.Reader) (string, error) {
return "", err
}
- document.Find("script,style,noscript").Each(func(i int, s *goquery.Selection) {
+ document.Find("script,style").Each(func(i int, s *goquery.Selection) {
removeNodes(s)
})
diff --git a/reader/rewrite/rewrite_functions.go b/reader/rewrite/rewrite_functions.go
index 55016a71..ea162af5 100644
--- a/reader/rewrite/rewrite_functions.go
+++ b/reader/rewrite/rewrite_functions.go
@@ -139,6 +139,21 @@ func addDynamicImage(entryURL, entryContent string) string {
return entryContent
}
+func fixMediumImages(entryURL, entryContent string) string {
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
+ if err != nil {
+ return entryContent
+ }
+
+ doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
+ noscriptElement := paragraphImage.Find("noscript")
+ paragraphImage.ReplaceWithHtml(noscriptElement.Text())
+ })
+
+ output, _ := doc.Find("body").First().Html()
+ return output
+}
+
func addYoutubeVideo(entryURL, entryContent string) string {
matches := youtubeRegex.FindStringSubmatch(entryURL)
diff --git a/reader/rewrite/rewriter.go b/reader/rewrite/rewriter.go
index a34b0adf..8c26719c 100644
--- a/reader/rewrite/rewriter.go
+++ b/reader/rewrite/rewriter.go
@@ -43,6 +43,8 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
entryContent = replaceLineFeeds(entryContent)
case "convert_text_link", "convert_text_links":
entryContent = replaceTextLinks(entryContent)
+ case "fix_medium_images":
+ entryContent = fixMediumImages(entryURL, entryContent)
}
}
diff --git a/reader/rewrite/rewriter_test.go b/reader/rewrite/rewriter_test.go
index 8c0db6cf..04f4c657 100644
--- a/reader/rewrite/rewriter_test.go
+++ b/reader/rewrite/rewriter_test.go
@@ -4,7 +4,10 @@
package rewrite // import "miniflux.app/reader/rewrite"
-import "testing"
+import (
+ "strings"
+ "testing"
+)
func TestReplaceTextLinks(t *testing.T) {
scenarios := map[string]string{
@@ -176,3 +179,32 @@ func TestConvertTextLinkRewriteRule(t *testing.T) {
t.Errorf(`Not expected output: got %q instead of %q`, output, expected)
}
}
+
+func TestMediumImage(t *testing.T) {
+ content := `
+
+ `
+ expected := ``
+ output := Rewriter("https://example.org/article", content, "fix_medium_images")
+ output = strings.TrimSpace(output)
+
+ if expected != output {
+ t.Errorf(`Not expected output: %s`, output)
+ }
+}
diff --git a/reader/rewrite/rules.go b/reader/rewrite/rules.go
index fb644a48..b954b765 100644
--- a/reader/rewrite/rules.go
+++ b/reader/rewrite/rules.go
@@ -30,4 +30,5 @@ var predefinedRules = map[string]string{
"invidio.us": "add_invidious_video",
"xkcd.com": "add_image_title",
"framatube.org": "nl2br,convert_text_link",
+ "medium.com": "fix_medium_images",
}
diff --git a/reader/scraper/rules.go b/reader/scraper/rules.go
index a8dda7d8..e0f2f582 100644
--- a/reader/scraper/rules.go
+++ b/reader/scraper/rules.go
@@ -7,43 +7,42 @@ package scraper // import "miniflux.app/reader/scraper"
// List of predefined scraper rules (alphabetically sorted)
// domain => CSS selectors
var predefinedRules = map[string]string{
- "bbc.co.uk": "div.vxp-column--single, div.story-body__inner, ul.gallery-images__list",
- "cbc.ca": ".story-content",
- "darkreading.com": "#article-main:not(header)",
- "developpez.com": "div[itemprop=articleBody]",
- "dilbert.com": "span.comic-title-name, img.img-comic",
+ "bbc.co.uk": "div.vxp-column--single, div.story-body__inner, ul.gallery-images__list",
+ "cbc.ca": ".story-content",
+ "darkreading.com": "#article-main:not(header)",
+ "developpez.com": "div[itemprop=articleBody]",
+ "dilbert.com": "span.comic-title-name, img.img-comic",
"financialsamurai.com": "article",
- "francetvinfo.fr": ".text",
- "github.com": "article.entry-content",
- "heise.de": "header .article-content__lead, header .article-image, div.article-layout__content.article-content",
- "igen.fr": "section.corps",
- "ing.dk": "section.body",
- "lapresse.ca": ".amorce, .entry",
- "lemonde.fr": "article",
- "lepoint.fr": ".art-text",
- "lesjoiesducode.fr": ".blog-post-content img",
- "lesnumeriques.com": ".text",
- "linux.com": "div.content, div[property]",
- "medium.com": ".section-content",
- "mac4ever.com": "div[itemprop=articleBody]",
- "monwindows.com": ".blog-post-body",
- "npr.org": "#storytext",
- "oneindia.com": ".io-article-body",
- "opensource.com": "div[property]",
- "osnews.com": "div.newscontent1",
- "phoronix.com": "div.content",
- "pseudo-sciences.org": "#art_main",
- "raywenderlich.com": "article",
- "slate.fr": ".field-items",
- "techcrunch.com": "div.article-entry",
- "theoatmeal.com": "div#comic",
- "theregister.co.uk": "#body",
- "turnoff.us": "article.post-content",
- "universfreebox.com": "#corps_corps",
- "version2.dk": "section.body",
- "wdwnt.com": "div.entry-content",
- "wired.com": "main figure, article",
- "zeit.de": ".summary, .article-body",
- "zdnet.com": "div.storyBody",
- "openingsource.org": "article.suxing-popup-gallery",
+ "francetvinfo.fr": ".text",
+ "github.com": "article.entry-content",
+ "heise.de": "header .article-content__lead, header .article-image, div.article-layout__content.article-content",
+ "igen.fr": "section.corps",
+ "ing.dk": "section.body",
+ "lapresse.ca": ".amorce, .entry",
+ "lemonde.fr": "article",
+ "lepoint.fr": ".art-text",
+ "lesjoiesducode.fr": ".blog-post-content img",
+ "lesnumeriques.com": ".text",
+ "linux.com": "div.content, div[property]",
+ "mac4ever.com": "div[itemprop=articleBody]",
+ "monwindows.com": ".blog-post-body",
+ "npr.org": "#storytext",
+ "oneindia.com": ".io-article-body",
+ "opensource.com": "div[property]",
+ "osnews.com": "div.newscontent1",
+ "phoronix.com": "div.content",
+ "pseudo-sciences.org": "#art_main",
+ "raywenderlich.com": "article",
+ "slate.fr": ".field-items",
+ "techcrunch.com": "div.article-entry",
+ "theoatmeal.com": "div#comic",
+ "theregister.co.uk": "#body",
+ "turnoff.us": "article.post-content",
+ "universfreebox.com": "#corps_corps",
+ "version2.dk": "section.body",
+ "wdwnt.com": "div.entry-content",
+ "wired.com": "main figure, article",
+ "zeit.de": ".summary, .article-body",
+ "zdnet.com": "div.storyBody",
+ "openingsource.org": "article.suxing-popup-gallery",
}