From a1537f4b0d2b0c960f6d3ea02bed11d47fc8d338 Mon Sep 17 00:00:00 2001 From: Tianfeng Wang Date: Thu, 26 Oct 2023 03:38:08 +0100 Subject: [PATCH] Filter feed entries based on url or title --- internal/reader/processor/processor.go | 20 ++++++++++++++++---- internal/reader/processor/processor_test.go | 4 ++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index 6b9af305..2463e525 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -115,8 +115,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool { if feed.BlocklistRules != "" { - match, _ := regexp.MatchString(feed.BlocklistRules, entry.Title) - if match { + if matchField(feed.BlocklistRules, entry.URL) || matchField(feed.BlocklistRules, entry.Title) { slog.Debug("Blocking entry based on rule", slog.Int64("entry_id", entry.ID), slog.String("entry_url", entry.URL), @@ -127,13 +126,13 @@ func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool { return true } } + return false } func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool { if feed.KeeplistRules != "" { - match, _ := regexp.MatchString(feed.KeeplistRules, entry.Title) - if match { + if matchField(feed.KeeplistRules, entry.URL) || matchField(feed.KeeplistRules, entry.Title) { slog.Debug("Allow entry based on rule", slog.Int64("entry_id", entry.ID), slog.String("entry_url", entry.URL), @@ -148,6 +147,19 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool { return true } +func matchField(pattern, value string) bool { + match, err := regexp.MatchString(pattern, value) + if err != nil { + slog.Debug("Failed on regexp match", + slog.String("pattern", pattern), + slog.String("value", value), + slog.Bool("match", match), + slog.Any("error", err), + ) + } + return match +} + // ProcessEntryWebPage downloads the entry web page and apply rewrite rules. func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error { startTime := time.Now() diff --git a/internal/reader/processor/processor_test.go b/internal/reader/processor/processor_test.go index a719c81f..bbc58c23 100644 --- a/internal/reader/processor/processor_test.go +++ b/internal/reader/processor/processor_test.go @@ -16,6 +16,8 @@ func TestBlockingEntries(t *testing.T) { entry *model.Entry expected bool }{ + {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{URL: "https://example.com"}, true}, + {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{URL: "https://different.com"}, false}, {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, true}, {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, false}, {&model.Feed{ID: 1}, &model.Entry{Title: "No rule defined"}, false}, @@ -35,6 +37,8 @@ func TestAllowEntries(t *testing.T) { entry *model.Entry expected bool }{ + {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "https://example.com"}, true}, + {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "https://different.com"}, false}, {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, true}, {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, false}, {&model.Feed{ID: 1}, &model.Entry{Title: "No rule defined"}, true},