From c2d2f314384c92e0b8c836400572415433620a6e Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 26 Feb 2024 17:37:49 +0100 Subject: [PATCH] Improve a bit internal/reader/scraper/scraper.go - make findContentUsingCustomRules' more idiomatic, since in golang a function returning an error might return garbage in other parameter. Moreover, ignoring errors is bad practise. - getPredefinedScraperRules is now running in constant-time, instead of iterating on a list with around 50 items in it. --- internal/reader/scraper/scraper.go | 15 ++++++--------- internal/reader/scraper/scraper_test.go | 4 ++++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/internal/reader/scraper/scraper.go b/internal/reader/scraper/scraper.go index 4aabff48..0a0832d4 100644 --- a/internal/reader/scraper/scraper.go +++ b/internal/reader/scraper/scraper.go @@ -78,10 +78,9 @@ func findContentUsingCustomRules(page io.Reader, rules string) (string, error) { contents := "" document.Find(rules).Each(func(i int, s *goquery.Selection) { - var content string - - content, _ = goquery.OuterHtml(s) - contents += content + if content, err := goquery.OuterHtml(s); err == nil { + contents += content + } }) return contents, nil @@ -89,13 +88,11 @@ func findContentUsingCustomRules(page io.Reader, rules string) (string, error) { func getPredefinedScraperRules(websiteURL string) string { urlDomain := urllib.Domain(websiteURL) + urlDomain = strings.TrimPrefix(urlDomain, "www.") - for domain, rules := range predefinedRules { - if strings.Contains(urlDomain, domain) { - return rules - } + if rules, ok := predefinedRules[urlDomain]; ok { + return rules } - return "" } diff --git a/internal/reader/scraper/scraper_test.go b/internal/reader/scraper/scraper_test.go index 9ad1e080..bf786129 100644 --- a/internal/reader/scraper/scraper_test.go +++ b/internal/reader/scraper/scraper_test.go @@ -19,6 +19,10 @@ func TestGetPredefinedRules(t *testing.T) { t.Error("Unable to find rule for linux.com") } + if getPredefinedScraperRules("https://linux.com/") == "" { + t.Error("Unable to find rule for linux.com") + } + if getPredefinedScraperRules("https://example.org/") != "" { t.Error("A rule not defined should not return anything") }