1
0
Fork 0

Simplify feed fetcher

- Add browser package to handle HTTP errors
- Reduce code duplication
This commit is contained in:
Frédéric Guillot 2018-10-14 21:43:48 -07:00
parent 5870f04260
commit 778346b0b0
8 changed files with 275 additions and 186 deletions

View file

@ -7,9 +7,11 @@ package model // import "miniflux.app/model"
import ( import (
"fmt" "fmt"
"time" "time"
"miniflux.app/http/client"
) )
// Feed represents a feed in the database. // Feed represents a feed in the application.
type Feed struct { type Feed struct {
ID int64 `json:"id"` ID int64 `json:"id"`
UserID int64 `json:"user_id"` UserID int64 `json:"user_id"`
@ -43,5 +45,46 @@ func (f *Feed) String() string {
) )
} }
// WithClientResponse updates feed attributes from an HTTP request.
func (f *Feed) WithClientResponse(response *client.Response) {
f.EtagHeader = response.ETag
f.LastModifiedHeader = response.LastModified
f.FeedURL = response.EffectiveURL
}
// WithCategoryID initializes the category attribute of the feed.
func (f *Feed) WithCategoryID(categoryID int64) {
f.Category = &Category{ID: categoryID}
}
// WithBrowsingParameters defines browsing parameters.
func (f *Feed) WithBrowsingParameters(crawler bool, userAgent, username, password string) {
f.Crawler = crawler
f.UserAgent = userAgent
f.Username = username
f.Password = password
}
// WithError adds a new error message and increment the error counter.
func (f *Feed) WithError(message string) {
f.ParsingErrorCount++
f.ParsingErrorMsg = message
}
// ResetErrorCounter removes all previous errors.
func (f *Feed) ResetErrorCounter() {
f.ParsingErrorCount = 0
f.ParsingErrorMsg = ""
}
// CheckedNow set attribute values when the feed is refreshed.
func (f *Feed) CheckedNow() {
f.CheckedAt = time.Now()
if f.SiteURL == "" {
f.SiteURL = f.FeedURL
}
}
// Feeds is a list of feed // Feeds is a list of feed
type Feeds []*Feed type Feeds []*Feed

101
model/feed_test.go Normal file
View file

@ -0,0 +1,101 @@
// Copyright 2018 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package model // import "miniflux.app/reader/model"
import (
"testing"
"miniflux.app/http/client"
)
func TestFeedWithResponse(t *testing.T) {
response := &client.Response{ETag: "Some etag", LastModified: "Some date", EffectiveURL: "Some URL"}
feed := &Feed{}
feed.WithClientResponse(response)
if feed.EtagHeader != "Some etag" {
t.Fatal(`The ETag header should be set`)
}
if feed.LastModifiedHeader != "Some date" {
t.Fatal(`The LastModified header should be set`)
}
if feed.FeedURL != "Some URL" {
t.Fatal(`The Feed URL should be set`)
}
}
func TestFeedCategorySetter(t *testing.T) {
feed := &Feed{}
feed.WithCategoryID(int64(123))
if feed.Category == nil {
t.Fatal(`The category field should not be null`)
}
if feed.Category.ID != int64(123) {
t.Error(`The category ID must be set`)
}
}
func TestFeedBrowsingParams(t *testing.T) {
feed := &Feed{}
feed.WithBrowsingParameters(true, "Custom User Agent", "Username", "Secret")
if !feed.Crawler {
t.Error(`The crawler must be activated`)
}
if feed.UserAgent != "Custom User Agent" {
t.Error(`The user agent must be set`)
}
if feed.Username != "Username" {
t.Error(`The username must be set`)
}
if feed.Password != "Secret" {
t.Error(`The password must be set`)
}
}
func TestFeedErrorCounter(t *testing.T) {
feed := &Feed{}
feed.WithError("Some Error")
if feed.ParsingErrorMsg != "Some Error" {
t.Error(`The error message must be set`)
}
if feed.ParsingErrorCount != 1 {
t.Error(`The error counter must be set to 1`)
}
feed.ResetErrorCounter()
if feed.ParsingErrorMsg != "" {
t.Error(`The error message must be removed`)
}
if feed.ParsingErrorCount != 0 {
t.Error(`The error counter must be set to 0`)
}
}
func TestFeedCheckedNow(t *testing.T) {
feed := &Feed{}
feed.FeedURL = "https://example.org/feed"
feed.CheckedNow()
if feed.SiteURL != feed.FeedURL {
t.Error(`The site URL must not be empty`)
}
if feed.CheckedAt.IsZero() {
t.Error(`The checked date must be set`)
}
}

55
reader/browser/browser.go Normal file
View file

@ -0,0 +1,55 @@
// Copyright 2018 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package browser // import "miniflux.app/reader/browser"
import (
"miniflux.app/errors"
"miniflux.app/http/client"
)
var (
errRequestFailed = "Unable to open this link: %v"
errServerFailure = "Unable to fetch this resource (Status Code = %d)"
errEncoding = "Unable to normalize encoding: %q"
errEmptyFeed = "This feed is empty"
errResourceNotFound = "Resource not found (404), this feed doesn't exists anymore, check the feed URL"
errNotAuthorized = "You are not authorized to access this resource (invalid username/password)"
)
// Exec executes a HTTP request and handles errors.
func Exec(request *client.Client) (*client.Response, *errors.LocalizedError) {
response, err := request.Get()
if err != nil {
if e, ok := err.(*errors.LocalizedError); ok {
return nil, e
}
return nil, errors.NewLocalizedError(errRequestFailed, err)
}
if response.IsNotFound() {
return nil, errors.NewLocalizedError(errResourceNotFound)
}
if response.IsNotAuthorized() {
return nil, errors.NewLocalizedError(errNotAuthorized)
}
if response.HasServerFailure() {
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
}
if response.StatusCode != 304 {
// Content-Length = -1 when no Content-Length header is sent.
if response.ContentLength == 0 {
return nil, errors.NewLocalizedError(errEmptyFeed)
}
if err := response.EnsureUnicodeBody(); err != nil {
return nil, errors.NewLocalizedError(errEncoding, err)
}
}
return response, nil
}

View file

@ -13,6 +13,7 @@ import (
"miniflux.app/locale" "miniflux.app/locale"
"miniflux.app/logger" "miniflux.app/logger"
"miniflux.app/model" "miniflux.app/model"
"miniflux.app/reader/browser"
"miniflux.app/reader/icon" "miniflux.app/reader/icon"
"miniflux.app/reader/parser" "miniflux.app/reader/parser"
"miniflux.app/reader/processor" "miniflux.app/reader/processor"
@ -21,14 +22,9 @@ import (
) )
var ( var (
errRequestFailed = "Unable to execute request: %v"
errServerFailure = "Unable to fetch feed (Status Code = %d)"
errDuplicate = "This feed already exists (%s)" errDuplicate = "This feed already exists (%s)"
errNotFound = "Feed %d not found" errNotFound = "Feed %d not found"
errEncoding = "Unable to normalize encoding: %q"
errCategoryNotFound = "Category not found for this user" errCategoryNotFound = "Category not found for this user"
errEmptyFeed = "This feed is empty"
errResourceNotFound = "Resource not found (404), this feed doesn't exists anymore, check the feed URL"
) )
// Handler contains all the logic to create and refresh feeds. // Handler contains all the logic to create and refresh feeds.
@ -44,155 +40,79 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,
return nil, errors.NewLocalizedError(errCategoryNotFound) return nil, errors.NewLocalizedError(errCategoryNotFound)
} }
clt := client.New(url) request := client.New(url)
clt.WithCredentials(username, password) request.WithCredentials(username, password)
clt.WithUserAgent(userAgent) request.WithUserAgent(userAgent)
response, err := clt.Get() response, requestErr := browser.Exec(request)
if err != nil { if requestErr != nil {
if _, ok := err.(*errors.LocalizedError); ok { return nil, requestErr
return nil, err
}
return nil, errors.NewLocalizedError(errRequestFailed, err)
}
if response.HasServerFailure() {
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
}
// Content-Length = -1 when no Content-Length header is sent
if response.ContentLength == 0 {
return nil, errors.NewLocalizedError(errEmptyFeed)
} }
if h.store.FeedURLExists(userID, response.EffectiveURL) { if h.store.FeedURLExists(userID, response.EffectiveURL) {
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL) return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
} }
if err := response.EnsureUnicodeBody(); err != nil { subscription, parseErr := parser.ParseFeed(response.String())
return nil, errors.NewLocalizedError(errEncoding, err) if parseErr != nil {
return nil, parseErr
} }
subscription, feedErr := parser.ParseFeed(response.String()) subscription.UserID = userID
if feedErr != nil { subscription.WithCategoryID(categoryID)
return nil, feedErr subscription.WithBrowsingParameters(crawler, userAgent, username, password)
} subscription.WithClientResponse(response)
subscription.CheckedNow()
feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription)
feedProcessor.WithCrawler(crawler) feedProcessor.WithCrawler(crawler)
feedProcessor.Process() feedProcessor.Process()
subscription.Category = &model.Category{ID: categoryID} if storeErr := h.store.CreateFeed(subscription); storeErr != nil {
subscription.EtagHeader = response.ETag return nil, storeErr
subscription.LastModifiedHeader = response.LastModified
subscription.FeedURL = response.EffectiveURL
subscription.UserID = userID
subscription.Crawler = crawler
subscription.UserAgent = userAgent
subscription.Username = username
subscription.Password = password
if subscription.SiteURL == "" {
subscription.SiteURL = subscription.FeedURL
}
err = h.store.CreateFeed(subscription)
if err != nil {
return nil, err
} }
logger.Debug("[Handler:CreateFeed] Feed saved with ID: %d", subscription.ID) logger.Debug("[Handler:CreateFeed] Feed saved with ID: %d", subscription.ID)
icon, err := icon.FindIcon(subscription.SiteURL) checkFeedIcon(h.store, subscription.ID, subscription.SiteURL)
if err != nil {
logger.Error("[Handler:CreateFeed] %v", err)
} else if icon == nil {
logger.Info("No icon found for feedID=%d", subscription.ID)
} else {
h.store.CreateFeedIcon(subscription, icon)
}
return subscription, nil return subscription, nil
} }
// RefreshFeed fetch and update a feed if necessary. // RefreshFeed fetch and update a feed if necessary.
func (h *Handler) RefreshFeed(userID, feedID int64) error { func (h *Handler) RefreshFeed(userID, feedID int64) error {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID)) defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID))
userLanguage, err := h.store.UserLanguage(userID) userLanguage := h.store.UserLanguage(userID)
if err != nil {
logger.Error("[Handler:RefreshFeed] %v", err)
userLanguage = "en_US"
}
printer := locale.NewPrinter(userLanguage) printer := locale.NewPrinter(userLanguage)
originalFeed, err := h.store.FeedByID(userID, feedID) originalFeed, storeErr := h.store.FeedByID(userID, feedID)
if err != nil { if storeErr != nil {
return err return storeErr
} }
if originalFeed == nil { if originalFeed == nil {
return errors.NewLocalizedError(errNotFound, feedID) return errors.NewLocalizedError(errNotFound, feedID)
} }
clt := client.New(originalFeed.FeedURL) originalFeed.CheckedNow()
clt.WithCredentials(originalFeed.Username, originalFeed.Password)
clt.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
clt.WithUserAgent(originalFeed.UserAgent)
response, err := clt.Get()
if err != nil {
var customErr errors.LocalizedError
if lerr, ok := err.(*errors.LocalizedError); ok {
customErr = *lerr
} else {
customErr = *errors.NewLocalizedError(errRequestFailed, err)
}
originalFeed.ParsingErrorCount++ request := client.New(originalFeed.FeedURL)
originalFeed.ParsingErrorMsg = customErr.Localize(printer) request.WithCredentials(originalFeed.Username, originalFeed.Password)
request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
request.WithUserAgent(originalFeed.UserAgent)
response, requestErr := browser.Exec(request)
if requestErr != nil {
originalFeed.WithError(requestErr.Localize(printer))
h.store.UpdateFeed(originalFeed) h.store.UpdateFeed(originalFeed)
return customErr return requestErr
}
originalFeed.CheckedAt = time.Now()
if response.IsNotFound() {
err := errors.NewLocalizedError(errResourceNotFound)
originalFeed.ParsingErrorCount++
originalFeed.ParsingErrorMsg = err.Localize(printer)
h.store.UpdateFeed(originalFeed)
return err
}
if response.HasServerFailure() {
err := errors.NewLocalizedError(errServerFailure, response.StatusCode)
originalFeed.ParsingErrorCount++
originalFeed.ParsingErrorMsg = err.Localize(printer)
h.store.UpdateFeed(originalFeed)
return err
} }
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) { if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID) logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
// Content-Length = -1 when no Content-Length header is sent
if response.ContentLength == 0 {
err := errors.NewLocalizedError(errEmptyFeed)
originalFeed.ParsingErrorCount++
originalFeed.ParsingErrorMsg = err.Localize(printer)
h.store.UpdateFeed(originalFeed)
return err
}
if err := response.EnsureUnicodeBody(); err != nil {
return errors.NewLocalizedError(errEncoding, err)
}
subscription, parseErr := parser.ParseFeed(response.String()) subscription, parseErr := parser.ParseFeed(response.String())
if parseErr != nil { if parseErr != nil {
originalFeed.ParsingErrorCount++ originalFeed.WithError(parseErr.Localize(printer))
originalFeed.ParsingErrorMsg = parseErr.Localize(printer)
h.store.UpdateFeed(originalFeed) h.store.UpdateFeed(originalFeed)
return err return parseErr
} }
feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription)
@ -202,34 +122,20 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
feedProcessor.WithCrawler(originalFeed.Crawler) feedProcessor.WithCrawler(originalFeed.Crawler)
feedProcessor.Process() feedProcessor.Process()
originalFeed.EtagHeader = response.ETag
originalFeed.LastModifiedHeader = response.LastModified
// Note: We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). // Note: We don't update existing entries when the crawler is enabled (we crawl only inexisting entries).
if err := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); err != nil { if storeErr := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); storeErr != nil {
return err return storeErr
} }
if !h.store.HasIcon(originalFeed.ID) { // We update caching headers only if the feed has been modified,
logger.Debug("[Handler:RefreshFeed] Looking for feed icon") // because some websites don't return the same headers when replying with a 304.
icon, err := icon.FindIcon(originalFeed.SiteURL) originalFeed.WithClientResponse(response)
if err != nil { checkFeedIcon(h.store, originalFeed.ID, originalFeed.SiteURL)
logger.Debug("[Handler:RefreshFeed] %v", err)
} else {
h.store.CreateFeedIcon(originalFeed, icon)
}
}
} else { } else {
logger.Debug("[Handler:RefreshFeed] Feed #%d not modified", feedID) logger.Debug("[Handler:RefreshFeed] Feed #%d not modified", feedID)
} }
originalFeed.ParsingErrorCount = 0 originalFeed.ResetErrorCounter()
originalFeed.ParsingErrorMsg = ""
if originalFeed.SiteURL == "" {
originalFeed.SiteURL = originalFeed.FeedURL
}
return h.store.UpdateFeed(originalFeed) return h.store.UpdateFeed(originalFeed)
} }
@ -237,3 +143,18 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
func NewFeedHandler(store *storage.Storage) *Handler { func NewFeedHandler(store *storage.Storage) *Handler {
return &Handler{store} return &Handler{store}
} }
func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL string) {
if !store.HasIcon(feedID) {
icon, err := icon.FindIcon(websiteURL)
if err != nil {
logger.Error("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL)
} else if icon == nil {
logger.Debug("CheckFeedIcon: No icon found (feedID=%d websiteURL=%s)", feedID, websiteURL)
} else {
if err := store.CreateFeedIcon(feedID, icon); err != nil {
logger.Error("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL)
}
}
}
}

View file

@ -5,58 +5,29 @@
package subscription // import "miniflux.app/reader/subscription" package subscription // import "miniflux.app/reader/subscription"
import ( import (
"fmt"
"io" "io"
"strings" "strings"
"time"
"miniflux.app/errors" "miniflux.app/errors"
"miniflux.app/http/client" "miniflux.app/http/client"
"miniflux.app/logger" "miniflux.app/reader/browser"
"miniflux.app/reader/parser" "miniflux.app/reader/parser"
"miniflux.app/timer"
"miniflux.app/url" "miniflux.app/url"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
var ( var (
errConnectionFailure = "Unable to open this link: %v"
errUnreadableDoc = "Unable to analyze this page: %v" errUnreadableDoc = "Unable to analyze this page: %v"
errEmptyBody = "This web page is empty"
errNotAuthorized = "You are not authorized to access this resource (invalid username/password)"
errServerFailure = "Unable to fetch this resource (Status Code = %d)"
) )
// FindSubscriptions downloads and try to find one or more subscriptions from an URL. // FindSubscriptions downloads and try to find one or more subscriptions from an URL.
func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, error) { func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL)) request := client.New(websiteURL)
request.WithCredentials(username, password)
clt := client.New(websiteURL) request.WithUserAgent(userAgent)
clt.WithCredentials(username, password) response, err := browser.Exec(request)
clt.WithUserAgent(userAgent)
response, err := clt.Get()
if err != nil { if err != nil {
if _, ok := err.(errors.LocalizedError); ok {
return nil, err
}
return nil, errors.NewLocalizedError(errConnectionFailure, err)
}
if response.IsNotAuthorized() {
return nil, errors.NewLocalizedError(errNotAuthorized)
}
if response.HasServerFailure() {
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
}
// Content-Length = -1 when no Content-Length header is sent
if response.ContentLength == 0 {
return nil, errors.NewLocalizedError(errEmptyBody)
}
if err := response.EnsureUnicodeBody(); err != nil {
return nil, err return nil, err
} }
@ -75,7 +46,7 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr
return parseDocument(response.EffectiveURL, strings.NewReader(body)) return parseDocument(response.EffectiveURL, strings.NewReader(body))
} }
func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) { func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
var subscriptions Subscriptions var subscriptions Subscriptions
queries := map[string]string{ queries := map[string]string{
"link[type='application/rss+xml']": "rss", "link[type='application/rss+xml']": "rss",
@ -108,7 +79,6 @@ func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) {
} }
if subscription.URL != "" { if subscription.URL != "" {
logger.Debug("[FindSubscriptions] %s", subscription)
subscriptions = append(subscriptions, subscription) subscriptions = append(subscriptions, subscription)
} }
}) })

View file

@ -100,8 +100,8 @@ func (s *Storage) CreateIcon(icon *model.Icon) error {
} }
// CreateFeedIcon creates an icon and associate the icon to the given feed. // CreateFeedIcon creates an icon and associate the icon to the given feed.
func (s *Storage) CreateFeedIcon(feed *model.Feed, icon *model.Icon) error { func (s *Storage) CreateFeedIcon(feedID int64, icon *model.Icon) error {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:CreateFeedIcon] feedID=%d", feed.ID)) defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:CreateFeedIcon] feedID=%d", feedID))
err := s.IconByHash(icon) err := s.IconByHash(icon)
if err != nil { if err != nil {
@ -115,7 +115,7 @@ func (s *Storage) CreateFeedIcon(feed *model.Feed, icon *model.Icon) error {
} }
} }
_, err = s.db.Exec(`INSERT INTO feed_icons (feed_id, icon_id) VALUES ($1, $2)`, feed.ID, icon.ID) _, err = s.db.Exec(`INSERT INTO feed_icons (feed_id, icon_id) VALUES ($1, $2)`, feedID, icon.ID)
if err != nil { if err != nil {
return fmt.Errorf("unable to create feed icon: %v", err) return fmt.Errorf("unable to create feed icon: %v", err)
} }

View file

@ -175,15 +175,14 @@ func (s *Storage) UpdateUser(user *model.User) error {
} }
// UserLanguage returns the language of the given user. // UserLanguage returns the language of the given user.
func (s *Storage) UserLanguage(userID int64) (language string, err error) { func (s *Storage) UserLanguage(userID int64) (language string) {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:UserLanguage] userID=%d", userID)) defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:UserLanguage] userID=%d", userID))
err = s.db.QueryRow(`SELECT language FROM users WHERE id = $1`, userID).Scan(&language) err := s.db.QueryRow(`SELECT language FROM users WHERE id = $1`, userID).Scan(&language)
if err == sql.ErrNoRows { if err != nil {
return "en_US", nil return "en_US"
} else if err != nil {
return "", fmt.Errorf("unable to fetch user language: %v", err)
} }
return language, nil
return language
} }
// UserByID finds a user by the ID. // UserByID finds a user by the ID.

View file

@ -50,16 +50,16 @@ func (c *Controller) SubmitSubscription(w http.ResponseWriter, r *http.Request)
return return
} }
subscriptions, err := subscription.FindSubscriptions( subscriptions, findErr := subscription.FindSubscriptions(
subscriptionForm.URL, subscriptionForm.URL,
subscriptionForm.UserAgent, subscriptionForm.UserAgent,
subscriptionForm.Username, subscriptionForm.Username,
subscriptionForm.Password, subscriptionForm.Password,
) )
if err != nil { if findErr != nil {
logger.Error("[Controller:SubmitSubscription] %v", err) logger.Error("[UI:SubmitSubscription] %s", findErr)
v.Set("form", subscriptionForm) v.Set("form", subscriptionForm)
v.Set("errorMessage", err) v.Set("errorMessage", findErr)
html.OK(w, r, v.Render("add_subscription")) html.OK(w, r, v.Render("add_subscription"))
return return
} }