Minor simplifications of the rewriter
- Online some one-line functions - Transform a free-standing function into a method - Massively simplify `removeClickbait` - Use a proper constant instead of a magic number in `applyFuncOnTextContent`
This commit is contained in:
parent
02a074ed26
commit
c29ca0e313
2 changed files with 51 additions and 68 deletions
|
@ -14,6 +14,8 @@ import (
|
|||
|
||||
"miniflux.app/v2/internal/config"
|
||||
|
||||
nethtml "golang.org/x/net/html"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/yuin/goldmark"
|
||||
goldmarkhtml "github.com/yuin/goldmark/renderer/html"
|
||||
|
@ -301,10 +303,6 @@ func replaceTextLinks(input string) string {
|
|||
return textLinkRegex.ReplaceAllString(input, `<a href="${1}">${1}</a>`)
|
||||
}
|
||||
|
||||
func replaceLineFeeds(input string) string {
|
||||
return strings.ReplaceAll(input, "\n", "<br>")
|
||||
}
|
||||
|
||||
func replaceCustom(entryContent string, searchTerm string, replaceTerm string) string {
|
||||
re, err := regexp.Compile(searchTerm)
|
||||
if err == nil {
|
||||
|
@ -334,7 +332,7 @@ func addCastopodEpisode(entryURL, entryContent string) string {
|
|||
func applyFuncOnTextContent(entryContent string, selector string, repl func(string) string) string {
|
||||
var treatChildren func(i int, s *goquery.Selection)
|
||||
treatChildren = func(i int, s *goquery.Selection) {
|
||||
if s.Nodes[0].Type == 1 {
|
||||
if s.Nodes[0].Type == nethtml.TextNode {
|
||||
s.ReplaceWithHtml(repl(s.Nodes[0].Data))
|
||||
} else {
|
||||
s.Contents().Each(treatChildren)
|
||||
|
@ -457,17 +455,3 @@ func removeTables(entryContent string) string {
|
|||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func removeClickbait(entryTitle string) string {
|
||||
titleWords := []string{}
|
||||
for _, word := range strings.Fields(entryTitle) {
|
||||
runes := []rune(word)
|
||||
if len(runes) > 1 {
|
||||
// keep first rune as is to keep the first capital letter
|
||||
titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:])))
|
||||
} else {
|
||||
titleWords = append(titleWords, word)
|
||||
}
|
||||
}
|
||||
return strings.Join(titleWords, " ")
|
||||
}
|
||||
|
|
|
@ -11,6 +11,9 @@ import (
|
|||
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/urllib"
|
||||
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
type rule struct {
|
||||
|
@ -18,50 +21,7 @@ type rule struct {
|
|||
args []string
|
||||
}
|
||||
|
||||
// Rewriter modify item contents with a set of rewriting rules.
|
||||
func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
|
||||
rulesList := getPredefinedRewriteRules(entryURL)
|
||||
if customRewriteRules != "" {
|
||||
rulesList = customRewriteRules
|
||||
}
|
||||
|
||||
rules := parseRules(rulesList)
|
||||
rules = append(rules, rule{name: "add_pdf_download_link"})
|
||||
|
||||
slog.Debug("Rewrite rules applied",
|
||||
slog.Any("rules", rules),
|
||||
slog.String("entry_url", entryURL),
|
||||
)
|
||||
|
||||
for _, rule := range rules {
|
||||
applyRule(entryURL, entry, rule)
|
||||
}
|
||||
}
|
||||
|
||||
func parseRules(rulesText string) (rules []rule) {
|
||||
scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
|
||||
scan.Init(strings.NewReader(rulesText))
|
||||
|
||||
for {
|
||||
switch scan.Scan() {
|
||||
case scanner.Ident:
|
||||
rules = append(rules, rule{name: scan.TokenText()})
|
||||
|
||||
case scanner.String:
|
||||
if l := len(rules) - 1; l >= 0 {
|
||||
text := scan.TokenText()
|
||||
text, _ = strconv.Unquote(text)
|
||||
|
||||
rules[l].args = append(rules[l].args, text)
|
||||
}
|
||||
|
||||
case scanner.EOF:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func applyRule(entryURL string, entry *model.Entry, rule rule) {
|
||||
func (rule rule) applyRule(entryURL string, entry *model.Entry) {
|
||||
switch rule.name {
|
||||
case "add_image_title":
|
||||
entry.Content = addImageTitle(entryURL, entry.Content)
|
||||
|
@ -82,7 +42,7 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) {
|
|||
case "add_pdf_download_link":
|
||||
entry.Content = addPDFLink(entryURL, entry.Content)
|
||||
case "nl2br":
|
||||
entry.Content = replaceLineFeeds(entry.Content)
|
||||
entry.Content = strings.ReplaceAll(entry.Content, "\n", "<br>")
|
||||
case "convert_text_link", "convert_text_links":
|
||||
entry.Content = replaceTextLinks(entry.Content)
|
||||
case "fix_medium_images":
|
||||
|
@ -122,11 +82,11 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) {
|
|||
case "add_castopod_episode":
|
||||
entry.Content = addCastopodEpisode(entryURL, entry.Content)
|
||||
case "base64_decode":
|
||||
selector := "body"
|
||||
if len(rule.args) >= 1 {
|
||||
entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
|
||||
} else {
|
||||
entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
|
||||
selector = rule.args[0]
|
||||
}
|
||||
entry.Content = applyFuncOnTextContent(entry.Content, selector, decodeBase64Content)
|
||||
case "add_hn_links_using_hack":
|
||||
entry.Content = addHackerNewsLinksUsing(entry.Content, "hack")
|
||||
case "add_hn_links_using_opener":
|
||||
|
@ -136,7 +96,46 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) {
|
|||
case "remove_tables":
|
||||
entry.Content = removeTables(entry.Content)
|
||||
case "remove_clickbait":
|
||||
entry.Title = removeClickbait(entry.Title)
|
||||
entry.Title = cases.Title(language.English).String(strings.ToLower(entry.Title))
|
||||
}
|
||||
}
|
||||
|
||||
// Rewriter modify item contents with a set of rewriting rules.
|
||||
func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
|
||||
rulesList := getPredefinedRewriteRules(entryURL)
|
||||
if customRewriteRules != "" {
|
||||
rulesList = customRewriteRules
|
||||
}
|
||||
|
||||
rules := parseRules(rulesList)
|
||||
rules = append(rules, rule{name: "add_pdf_download_link"})
|
||||
|
||||
slog.Debug("Rewrite rules applied",
|
||||
slog.Any("rules", rules),
|
||||
slog.String("entry_url", entryURL),
|
||||
)
|
||||
|
||||
for _, rule := range rules {
|
||||
rule.applyRule(entryURL, entry)
|
||||
}
|
||||
}
|
||||
|
||||
func parseRules(rulesText string) (rules []rule) {
|
||||
scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
|
||||
scan.Init(strings.NewReader(rulesText))
|
||||
|
||||
for {
|
||||
switch scan.Scan() {
|
||||
case scanner.Ident:
|
||||
rules = append(rules, rule{name: scan.TokenText()})
|
||||
case scanner.String:
|
||||
if l := len(rules) - 1; l >= 0 {
|
||||
text, _ := strconv.Unquote(scan.TokenText())
|
||||
rules[l].args = append(rules[l].args, text)
|
||||
}
|
||||
case scanner.EOF:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue