From 33c4b5188c1ae24d8fdea4261728bef1768fb912 Mon Sep 17 00:00:00 2001 From: Romain de Laage Date: Sat, 8 Apr 2023 11:02:36 +0200 Subject: [PATCH] Add a rewrite rule to remove clickbait titles --- reader/processor/processor.go | 8 +- reader/rewrite/rewrite_functions.go | 14 + reader/rewrite/rewriter.go | 51 ++- reader/rewrite/rewriter_test.go | 476 ++++++++++++++++++++-------- 4 files changed, 379 insertions(+), 170 deletions(-) diff --git a/reader/processor/processor.go b/reader/processor/processor.go index 67f50b28..a5a26086 100644 --- a/reader/processor/processor.go +++ b/reader/processor/processor.go @@ -85,7 +85,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us } } - entry.Content = rewrite.Rewriter(url, entry.Content, feed.RewriteRules) + rewrite.Rewriter(url, entry, feed.RewriteRules) // The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered. entry.Content = sanitizer.Sanitize(url, entry.Content) @@ -168,14 +168,14 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) return scraperErr } - content = rewrite.Rewriter(url, content, entry.Feed.RewriteRules) - content = sanitizer.Sanitize(url, content) - if content != "" { entry.Content = content entry.ReadingTime = calculateReadingTime(content, user) } + rewrite.Rewriter(url, entry, entry.Feed.RewriteRules) + entry.Content = sanitizer.Sanitize(url, entry.Content) + return nil } diff --git a/reader/rewrite/rewrite_functions.go b/reader/rewrite/rewrite_functions.go index 1d34a6ea..26a08b6e 100644 --- a/reader/rewrite/rewrite_functions.go +++ b/reader/rewrite/rewrite_functions.go @@ -367,3 +367,17 @@ func removeTables(entryContent string) string { output, _ := doc.Find("body").First().Html() return output } + +func removeClickbait(entryTitle string) string { + titleWords := []string{} + for _, word := range strings.Fields(entryTitle) { + runes := []rune(word) + if len(runes) > 1 { + // keep first rune as is to keep the first capital letter + titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:]))) + } else { + titleWords = append(titleWords, word) + } + } + return strings.Join(titleWords, " ") +} diff --git a/reader/rewrite/rewriter.go b/reader/rewrite/rewriter.go index 961a47ec..9824fc3a 100644 --- a/reader/rewrite/rewriter.go +++ b/reader/rewrite/rewriter.go @@ -10,6 +10,7 @@ import ( "text/scanner" "miniflux.app/logger" + "miniflux.app/model" "miniflux.app/url" ) @@ -19,7 +20,7 @@ type rule struct { } // Rewriter modify item contents with a set of rewriting rules. -func Rewriter(entryURL, entryContent, customRewriteRules string) string { +func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) { rulesList := getPredefinedRewriteRules(entryURL) if customRewriteRules != "" { rulesList = customRewriteRules @@ -31,10 +32,8 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string { logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL) for _, rule := range rules { - entryContent = applyRule(entryURL, entryContent, rule) + applyRule(entryURL, entry, rule) } - - return entryContent } func parseRules(rulesText string) (rules []rule) { @@ -60,61 +59,61 @@ func parseRules(rulesText string) (rules []rule) { } } -func applyRule(entryURL, entryContent string, rule rule) string { +func applyRule(entryURL string, entry *model.Entry, rule rule) { switch rule.name { case "add_image_title": - entryContent = addImageTitle(entryURL, entryContent) + entry.Content = addImageTitle(entryURL, entry.Content) case "add_mailto_subject": - entryContent = addMailtoSubject(entryURL, entryContent) + entry.Content = addMailtoSubject(entryURL, entry.Content) case "add_dynamic_image": - entryContent = addDynamicImage(entryURL, entryContent) + entry.Content = addDynamicImage(entryURL, entry.Content) case "add_youtube_video": - entryContent = addYoutubeVideo(entryURL, entryContent) + entry.Content = addYoutubeVideo(entryURL, entry.Content) case "add_invidious_video": - entryContent = addInvidiousVideo(entryURL, entryContent) + entry.Content = addInvidiousVideo(entryURL, entry.Content) case "add_youtube_video_using_invidious_player": - entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent) + entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content) case "add_youtube_video_from_id": - entryContent = addYoutubeVideoFromId(entryContent) + entry.Content = addYoutubeVideoFromId(entry.Content) case "add_pdf_download_link": - entryContent = addPDFLink(entryURL, entryContent) + entry.Content = addPDFLink(entryURL, entry.Content) case "nl2br": - entryContent = replaceLineFeeds(entryContent) + entry.Content = replaceLineFeeds(entry.Content) case "convert_text_link", "convert_text_links": - entryContent = replaceTextLinks(entryContent) + entry.Content = replaceTextLinks(entry.Content) case "fix_medium_images": - entryContent = fixMediumImages(entryURL, entryContent) + entry.Content = fixMediumImages(entryURL, entry.Content) case "use_noscript_figure_images": - entryContent = useNoScriptImages(entryURL, entryContent) + entry.Content = useNoScriptImages(entryURL, entry.Content) case "replace": // Format: replace("search-term"|"replace-term") if len(rule.args) >= 2 { - entryContent = replaceCustom(entryContent, rule.args[0], rule.args[1]) + entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1]) } else { logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule) } case "remove": // Format: remove("#selector > .element, .another") if len(rule.args) >= 1 { - entryContent = removeCustom(entryContent, rule.args[0]) + entry.Content = removeCustom(entry.Content, rule.args[0]) } else { logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule) } case "add_castopod_episode": - entryContent = addCastopodEpisode(entryURL, entryContent) + entry.Content = addCastopodEpisode(entryURL, entry.Content) case "base64_decode": if len(rule.args) >= 1 { - entryContent = applyFuncOnTextContent(entryContent, rule.args[0], decodeBase64Content) + entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content) } else { - entryContent = applyFuncOnTextContent(entryContent, "body", decodeBase64Content) + entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content) } case "parse_markdown": - entryContent = parseMarkdown(entryContent) + entry.Content = parseMarkdown(entry.Content) case "remove_tables": - entryContent = removeTables(entryContent) + entry.Content = removeTables(entry.Content) + case "remove_clickbait": + entry.Title = removeClickbait(entry.Title) } - - return entryContent } func getPredefinedRewriteRules(entryURL string) string { diff --git a/reader/rewrite/rewriter_test.go b/reader/rewrite/rewriter_test.go index 7d3306b1..073809df 100644 --- a/reader/rewrite/rewriter_test.go +++ b/reader/rewrite/rewriter_test.go @@ -8,6 +8,8 @@ import ( "reflect" "strings" "testing" + + "miniflux.app/model" ) func TestParseRules(t *testing.T) { @@ -46,178 +48,301 @@ func TestReplaceTextLinks(t *testing.T) { } func TestRewriteWithNoMatchingRule(t *testing.T) { - output := Rewriter("https://example.org/article", `Some text.`, ``) - expected := `Some text.` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Some text.`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Some text.`, + } + Rewriter("https://example.org/article", testEntry, ``) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithYoutubeLink(t *testing.T) { - output := Rewriter("https://www.youtube.com/watch?v=1234", "Video Description", ``) - expected := `
Video Description` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
Video Description`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Video Description`, + } + Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithInexistingCustomRule(t *testing.T) { - output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`) - expected := `Video Description` - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Video Description`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Video Description`, + } + Rewriter("https://www.youtube.com/watch?v=1234", testEntry, `some rule`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithXkcdLink(t *testing.T) { - description := `Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.` - output := Rewriter("https://xkcd.com/1912/", description, ``) - expected := `
Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.

Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.

` - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.

Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.

`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.`, + } + Rewriter("https://xkcd.com/1912/", testEntry, ``) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) { - description := `<foo>` - output := Rewriter("https://xkcd.com/1912/", description, ``) - expected := `
<foo>

<foo>

` - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
<foo>

<foo>

`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `<foo>`, + } + Rewriter("https://xkcd.com/1912/", testEntry, ``) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) { - description := `Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.` - output := Rewriter("https://xkcd.com/1912/", description, ``) - expected := description - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.`, + } + Rewriter("https://xkcd.com/1912/", testEntry, ``) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) { - description := "test" - output := Rewriter("https://xkcd.com/1912/", description, ``) - expected := description - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `test`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `test`, + } + Rewriter("https://xkcd.com/1912/", testEntry, ``) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithXkcdAndNoImage(t *testing.T) { - description := "test" - output := Rewriter("https://xkcd.com/1912/", description, ``) - expected := description + controlEntry := &model.Entry{ + Title: `A title`, + Content: `test`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `test`, + } + Rewriter("https://xkcd.com/1912/", testEntry, ``) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteMailtoLink(t *testing.T) { - description := `contact` - output := Rewriter("https://www.qwantz.com/", description, ``) - expected := `contact [blah blah]` - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `contact [blah blah]`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `contact`, + } + Rewriter("https://www.qwantz.com/", testEntry, ``) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithPDFLink(t *testing.T) { - description := "test" - output := Rewriter("https://example.org/document.pdf", description, ``) - expected := `PDF
test` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `PDF
test`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `test`, + } + Rewriter("https://example.org/document.pdf", testEntry, ``) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithNoLazyImage(t *testing.T) { - description := `Image` - output := Rewriter("https://example.org/article", description, "add_dynamic_image") - expected := description + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + Rewriter("https://example.org/article", testEntry, "add_dynamic_image") - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithLazyImage(t *testing.T) { - description := `Image` - output := Rewriter("https://example.org/article", description, "add_dynamic_image") - expected := `Image` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + Rewriter("https://example.org/article", testEntry, "add_dynamic_image") - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithLazyDivImage(t *testing.T) { - description := `
` - output := Rewriter("https://example.org/article", description, "add_dynamic_image") - expected := `Image` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `
`, + } + Rewriter("https://example.org/article", testEntry, "add_dynamic_image") - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) { - description := `Image` - output := Rewriter("https://example.org/article", description, "add_dynamic_image") - expected := `ImageFallback` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `ImageFallback`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + Rewriter("https://example.org/article", testEntry, "add_dynamic_image") - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithLazySrcset(t *testing.T) { - description := `Image` - output := Rewriter("https://example.org/article", description, "add_dynamic_image") - expected := `Image` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + Rewriter("https://example.org/article", testEntry, "add_dynamic_image") - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteWithImageAndLazySrcset(t *testing.T) { - description := `Image` - output := Rewriter("https://example.org/article", description, "add_dynamic_image") - expected := `Image` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Image`, + } + Rewriter("https://example.org/article", testEntry, "add_dynamic_image") - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestNewLineRewriteRule(t *testing.T) { - description := "A\nB\nC" - output := Rewriter("https://example.org/article", description, "nl2br") - expected := `A
B
C` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `A
B
C`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: "A\nB\nC", + } + Rewriter("https://example.org/article", testEntry, "nl2br") - if expected != output { - t.Errorf(`Not expected output: got %q instead of %q`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestConvertTextLinkRewriteRule(t *testing.T) { - description := "Test: http://example.org/a/b" - output := Rewriter("https://example.org/article", description, "convert_text_link") - expected := `Test: http://example.org/a/b` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Test: http://example.org/a/b`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Test: http://example.org/a/b`, + } + Rewriter("https://example.org/article", testEntry, "convert_text_link") - if expected != output { - t.Errorf(`Not expected output: got %q instead of %q`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestMediumImage(t *testing.T) { - content := ` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Image for post`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `
@@ -235,103 +360,174 @@ func TestMediumImage(t *testing.T) {
- ` - expected := `Image for post` - output := Rewriter("https://example.org/article", content, "fix_medium_images") - output = strings.TrimSpace(output) + `, + } + Rewriter("https://example.org/article", testEntry, "fix_medium_images") + testEntry.Content = strings.TrimSpace(testEntry.Content) - if expected != output { - t.Errorf(`Not expected output: %s`, output) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) { - content := `
The beautiful MDN logo.
MDN Logo
` - expected := `
The beautiful MDN logo.
MDN Logo
` - output := Rewriter("https://example.org/article", content, "use_noscript_figure_images") - output = strings.TrimSpace(output) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
The beautiful MDN logo.
MDN Logo
`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `
The beautiful MDN logo.
MDN Logo
`, + } + Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images") + testEntry.Content = strings.TrimSpace(testEntry.Content) - if expected != output { - t.Errorf(`Not expected output: %s`, output) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) { - content := `
The beautiful MDN logo.
MDN Logo
` - expected := `
MDN Logo
` - output := Rewriter("https://example.org/article", content, "use_noscript_figure_images") - output = strings.TrimSpace(output) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
MDN Logo
`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `
The beautiful MDN logo.
MDN Logo
`, + } + Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images") + testEntry.Content = strings.TrimSpace(testEntry.Content) - if expected != output { - t.Errorf(`Not expected output: %s`, output) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteReplaceCustom(t *testing.T) { - content := `` - expected := `` - output := Rewriter("https://example.org/article", content, `replace("article/(.*).svg"|"article/$1.png")`) + controlEntry := &model.Entry{ + Title: `A title`, + Content: ``, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: ``, + } + Rewriter("https://example.org/article", testEntry, `replace("article/(.*).svg"|"article/$1.png")`) - if expected != output { - t.Errorf(`Not expected output: %s`, output) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteRemoveCustom(t *testing.T) { - content := `
Lorem Ipsum I dont want to see thisSuper important info
` - expected := `
Lorem Ipsum Super important info
` - output := Rewriter("https://example.org/article", content, `remove(".spam, .ads:not(.keep)")`) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
Lorem Ipsum Super important info
`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `
Lorem Ipsum I dont want to see thisSuper important info
`, + } + Rewriter("https://example.org/article", testEntry, `remove(".spam, .ads:not(.keep)")`) - if expected != output { - t.Errorf(`Not expected output: %s`, output) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteAddCastopodEpisode(t *testing.T) { - output := Rewriter("https://podcast.demo/@demo/episodes/test", "Episode Description", `add_castopod_episode`) - expected := `
Episode Description` + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
Episode Description`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Episode Description`, + } + Rewriter("https://podcast.demo/@demo/episodes/test", testEntry, `add_castopod_episode`) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteBase64Decode(t *testing.T) { - content := `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=` - expected := `This is some base64 encoded content` - output := Rewriter("https://example.org/article", content, `base64_decode`) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `This is some base64 encoded content`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`, + } + Rewriter("https://example.org/article", testEntry, `base64_decode`) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteBase64DecodeInHTML(t *testing.T) { - content := `
Lorem Ipsum not valid base64VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=
` - expected := `
Lorem Ipsum not valid base64This is some base64 encoded content
` - output := Rewriter("https://example.org/article", content, `base64_decode`) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
Lorem Ipsum not valid base64This is some base64 encoded content
`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `
Lorem Ipsum not valid base64VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=
`, + } + Rewriter("https://example.org/article", testEntry, `base64_decode`) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteBase64DecodeArgs(t *testing.T) { - content := `
Lorem IpsumVGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=
` - expected := `
Lorem IpsumThis is some base64 encoded content
` - output := Rewriter("https://example.org/article", content, `base64_decode(".base64")`) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
Lorem IpsumThis is some base64 encoded content
`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `
Lorem IpsumVGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=
`, + } + Rewriter("https://example.org/article", testEntry, `base64_decode(".base64")`) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } func TestRewriteRemoveTables(t *testing.T) { - content := `

Test

Hello World!

Test

` - expected := `

Test

Hello World!

Test

` - output := Rewriter("https://example.org/article", content, `remove_tables`) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `

Test

Hello World!

Test

`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `

Test

Hello World!

Test

`, + } + Rewriter("https://example.org/article", testEntry, `remove_tables`) - if expected != output { - t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestRemoveClickbait(t *testing.T) { + controlEntry := &model.Entry{ + Title: `This Is Amazing`, + Content: `Some description`, + } + testEntry := &model.Entry{ + Title: `THIS IS AMAZING`, + Content: `Some description`, + } + Rewriter("https://example.org/article", testEntry, `remove_clickbait`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } }