2023-06-19 17:42:47 -04:00
|
|
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
2017-11-20 00:10:04 -05:00
|
|
|
|
2023-08-10 22:46:45 -04:00
|
|
|
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
2017-11-20 00:10:04 -05:00
|
|
|
|
|
|
|
import (
|
2023-09-24 19:32:09 -04:00
|
|
|
"log/slog"
|
2021-09-01 17:42:23 -04:00
|
|
|
"strconv"
|
2021-01-27 08:09:50 -05:00
|
|
|
"strings"
|
2021-09-01 17:42:23 -04:00
|
|
|
"text/scanner"
|
2017-11-20 00:10:04 -05:00
|
|
|
|
2023-08-10 22:46:45 -04:00
|
|
|
"miniflux.app/v2/internal/model"
|
2023-08-13 22:09:01 -04:00
|
|
|
"miniflux.app/v2/internal/urllib"
|
2017-11-20 00:10:04 -05:00
|
|
|
)
|
|
|
|
|
2021-09-01 17:42:23 -04:00
|
|
|
type rule struct {
|
|
|
|
name string
|
|
|
|
args []string
|
|
|
|
}
|
2020-11-25 17:51:54 -05:00
|
|
|
|
2017-12-12 01:16:32 -05:00
|
|
|
// Rewriter modify item contents with a set of rewriting rules.
|
2023-04-08 05:02:36 -04:00
|
|
|
func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
|
2017-12-12 01:16:32 -05:00
|
|
|
rulesList := getPredefinedRewriteRules(entryURL)
|
|
|
|
if customRewriteRules != "" {
|
|
|
|
rulesList = customRewriteRules
|
|
|
|
}
|
2017-11-20 00:10:04 -05:00
|
|
|
|
2021-09-01 17:42:23 -04:00
|
|
|
rules := parseRules(rulesList)
|
|
|
|
rules = append(rules, rule{name: "add_pdf_download_link"})
|
2017-12-14 00:30:40 -05:00
|
|
|
|
2023-09-24 19:32:09 -04:00
|
|
|
slog.Debug("Rewrite rules applied",
|
|
|
|
slog.Any("rules", rules),
|
|
|
|
slog.String("entry_url", entryURL),
|
|
|
|
)
|
2018-12-02 23:51:06 -05:00
|
|
|
|
2017-12-12 01:16:32 -05:00
|
|
|
for _, rule := range rules {
|
2023-04-08 05:02:36 -04:00
|
|
|
applyRule(entryURL, entry, rule)
|
2021-09-01 17:42:23 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseRules(rulesText string) (rules []rule) {
|
|
|
|
scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
|
|
|
|
scan.Init(strings.NewReader(rulesText))
|
|
|
|
|
|
|
|
for {
|
|
|
|
switch scan.Scan() {
|
|
|
|
case scanner.Ident:
|
|
|
|
rules = append(rules, rule{name: scan.TokenText()})
|
|
|
|
|
|
|
|
case scanner.String:
|
|
|
|
if l := len(rules) - 1; l >= 0 {
|
|
|
|
text := scan.TokenText()
|
|
|
|
text, _ = strconv.Unquote(text)
|
|
|
|
|
|
|
|
rules[l].args = append(rules[l].args, text)
|
2020-11-25 17:51:54 -05:00
|
|
|
}
|
2021-09-01 17:42:23 -04:00
|
|
|
|
|
|
|
case scanner.EOF:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-08 05:02:36 -04:00
|
|
|
func applyRule(entryURL string, entry *model.Entry, rule rule) {
|
2021-09-01 17:42:23 -04:00
|
|
|
switch rule.name {
|
|
|
|
case "add_image_title":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addImageTitle(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "add_mailto_subject":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addMailtoSubject(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "add_dynamic_image":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addDynamicImage(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "add_youtube_video":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addYoutubeVideo(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "add_invidious_video":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addInvidiousVideo(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "add_youtube_video_using_invidious_player":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
|
2022-01-03 10:47:10 -05:00
|
|
|
case "add_youtube_video_from_id":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addYoutubeVideoFromId(entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "add_pdf_download_link":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addPDFLink(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "nl2br":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = replaceLineFeeds(entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "convert_text_link", "convert_text_links":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = replaceTextLinks(entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "fix_medium_images":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = fixMediumImages(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "use_noscript_figure_images":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = useNoScriptImages(entryURL, entry.Content)
|
2021-09-01 17:42:23 -04:00
|
|
|
case "replace":
|
|
|
|
// Format: replace("search-term"|"replace-term")
|
|
|
|
if len(rule.args) >= 2 {
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
|
2021-09-01 17:42:23 -04:00
|
|
|
} else {
|
2023-09-24 19:32:09 -04:00
|
|
|
slog.Warn("Cannot find search and replace terms for replace rule",
|
|
|
|
slog.Any("rule", rule),
|
|
|
|
slog.String("entry_url", entryURL),
|
|
|
|
)
|
2021-09-01 17:42:23 -04:00
|
|
|
}
|
2023-09-06 23:09:54 -04:00
|
|
|
case "replace_title":
|
|
|
|
// Format: replace_title("search-term"|"replace-term")
|
|
|
|
if len(rule.args) >= 2 {
|
|
|
|
entry.Title = replaceCustom(entry.Title, rule.args[0], rule.args[1])
|
|
|
|
} else {
|
2023-09-24 19:32:09 -04:00
|
|
|
slog.Warn("Cannot find search and replace terms for replace_title rule",
|
|
|
|
slog.Any("rule", rule),
|
|
|
|
slog.String("entry_url", entryURL),
|
|
|
|
)
|
2023-09-06 23:09:54 -04:00
|
|
|
}
|
2021-09-01 17:42:23 -04:00
|
|
|
case "remove":
|
|
|
|
// Format: remove("#selector > .element, .another")
|
|
|
|
if len(rule.args) >= 1 {
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = removeCustom(entry.Content, rule.args[0])
|
2021-09-01 17:42:23 -04:00
|
|
|
} else {
|
2023-09-24 19:32:09 -04:00
|
|
|
slog.Warn("Cannot find selector for remove rule",
|
|
|
|
slog.Any("rule", rule),
|
|
|
|
slog.String("entry_url", entryURL),
|
|
|
|
)
|
2017-11-20 00:10:04 -05:00
|
|
|
}
|
2022-01-30 03:11:43 -05:00
|
|
|
case "add_castopod_episode":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = addCastopodEpisode(entryURL, entry.Content)
|
2022-05-25 23:44:04 -04:00
|
|
|
case "base64_decode":
|
|
|
|
if len(rule.args) >= 1 {
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
|
2022-05-25 23:44:04 -04:00
|
|
|
} else {
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
|
2022-05-25 23:44:04 -04:00
|
|
|
}
|
2023-09-23 16:54:48 -04:00
|
|
|
case "add_hn_links_using_hack":
|
|
|
|
entry.Content = addHackerNewsLinksUsing(entry.Content, "hack")
|
|
|
|
case "add_hn_links_using_opener":
|
|
|
|
entry.Content = addHackerNewsLinksUsing(entry.Content, "opener")
|
2022-07-27 10:55:28 -04:00
|
|
|
case "parse_markdown":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = parseMarkdown(entry.Content)
|
2023-03-31 14:23:31 -04:00
|
|
|
case "remove_tables":
|
2023-04-08 05:02:36 -04:00
|
|
|
entry.Content = removeTables(entry.Content)
|
|
|
|
case "remove_clickbait":
|
|
|
|
entry.Title = removeClickbait(entry.Title)
|
2017-12-12 01:16:32 -05:00
|
|
|
}
|
2017-11-20 00:10:04 -05:00
|
|
|
}
|
|
|
|
|
2017-12-12 01:16:32 -05:00
|
|
|
func getPredefinedRewriteRules(entryURL string) string {
|
2023-08-13 22:09:01 -04:00
|
|
|
urlDomain := urllib.Domain(entryURL)
|
2017-12-12 01:16:32 -05:00
|
|
|
for domain, rules := range predefinedRules {
|
|
|
|
if strings.Contains(urlDomain, domain) {
|
|
|
|
return rules
|
|
|
|
}
|
2017-11-20 00:10:04 -05:00
|
|
|
}
|
|
|
|
|
2017-12-12 01:16:32 -05:00
|
|
|
return ""
|
2017-11-20 00:10:04 -05:00
|
|
|
}
|