1
0
Fork 0

Use custom feed user agent to fetch website icon

This commit is contained in:
Frédéric Guillot 2022-01-08 15:09:12 -08:00
parent 8329e9b46c
commit 2309b27458
3 changed files with 21 additions and 12 deletions

View file

@ -90,6 +90,7 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model
store, store,
subscription.ID, subscription.ID,
subscription.SiteURL, subscription.SiteURL,
feedCreationRequest.UserAgent,
feedCreationRequest.FetchViaProxy, feedCreationRequest.FetchViaProxy,
feedCreationRequest.AllowSelfSignedCertificates, feedCreationRequest.AllowSelfSignedCertificates,
) )
@ -178,6 +179,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64) error {
store, store,
originalFeed.ID, originalFeed.ID,
originalFeed.SiteURL, originalFeed.SiteURL,
originalFeed.UserAgent,
originalFeed.FetchViaProxy, originalFeed.FetchViaProxy,
originalFeed.AllowSelfSignedCertificates, originalFeed.AllowSelfSignedCertificates,
) )
@ -196,9 +198,9 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64) error {
return nil return nil
} }
func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL string, fetchViaProxy, allowSelfSignedCertificates bool) { func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) {
if !store.HasIcon(feedID) { if !store.HasIcon(feedID) {
icon, err := icon.FindIcon(websiteURL, fetchViaProxy, allowSelfSignedCertificates) icon, err := icon.FindIcon(websiteURL, userAgent, fetchViaProxy, allowSelfSignedCertificates)
if err != nil { if err != nil {
logger.Debug(`[CheckFeedIcon] %v (feedID=%d websiteURL=%s)`, err, feedID, websiteURL) logger.Debug(`[CheckFeedIcon] %v (feedID=%d websiteURL=%s)`, err, feedID, websiteURL)
} else if icon == nil { } else if icon == nil {

View file

@ -21,21 +21,25 @@ import (
) )
// FindIcon try to find the website's icon. // FindIcon try to find the website's icon.
func FindIcon(websiteURL string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) { func FindIcon(websiteURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) {
rootURL := url.RootURL(websiteURL) rootURL := url.RootURL(websiteURL)
logger.Debug("[FindIcon] Trying to find an icon: rootURL=%q websiteURL=%q userAgent=%q", rootURL, websiteURL, userAgent)
clt := client.NewClientWithConfig(rootURL, config.Opts) clt := client.NewClientWithConfig(rootURL, config.Opts)
clt.WithUserAgent(userAgent)
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
if fetchViaProxy { if fetchViaProxy {
clt.WithProxy() clt.WithProxy()
} }
response, err := clt.Get() response, err := clt.Get()
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to download website index page: %v", err) return nil, fmt.Errorf("icon: unable to download website index page: %v", err)
} }
if response.HasServerFailure() { if response.HasServerFailure() {
return nil, fmt.Errorf("unable to download website index page: status=%d", response.StatusCode) return nil, fmt.Errorf("icon: unable to download website index page: status=%d", response.StatusCode)
} }
iconURL, err := parseDocument(rootURL, response.Body) iconURL, err := parseDocument(rootURL, response.Body)
@ -48,7 +52,7 @@ func FindIcon(websiteURL string, fetchViaProxy, allowSelfSignedCertificates bool
} }
logger.Debug("[FindIcon] Fetching icon => %s", iconURL) logger.Debug("[FindIcon] Fetching icon => %s", iconURL)
icon, err := downloadIcon(iconURL, fetchViaProxy, allowSelfSignedCertificates) icon, err := downloadIcon(iconURL, userAgent, fetchViaProxy, allowSelfSignedCertificates)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -66,7 +70,7 @@ func parseDocument(websiteURL string, data io.Reader) (string, error) {
doc, err := goquery.NewDocumentFromReader(data) doc, err := goquery.NewDocumentFromReader(data)
if err != nil { if err != nil {
return "", fmt.Errorf("unable to read document: %v", err) return "", fmt.Errorf("icon: unable to read document: %v", err)
} }
var iconURL string var iconURL string
@ -91,28 +95,30 @@ func parseDocument(websiteURL string, data io.Reader) (string, error) {
return iconURL, nil return iconURL, nil
} }
func downloadIcon(iconURL string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) { func downloadIcon(iconURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) {
clt := client.NewClientWithConfig(iconURL, config.Opts) clt := client.NewClientWithConfig(iconURL, config.Opts)
clt.WithUserAgent(userAgent)
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
if fetchViaProxy { if fetchViaProxy {
clt.WithProxy() clt.WithProxy()
} }
response, err := clt.Get() response, err := clt.Get()
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to download iconURL: %v", err) return nil, fmt.Errorf("icon: unable to download iconURL: %v", err)
} }
if response.HasServerFailure() { if response.HasServerFailure() {
return nil, fmt.Errorf("unable to download icon: status=%d", response.StatusCode) return nil, fmt.Errorf("icon: unable to download icon: status=%d", response.StatusCode)
} }
body, err := io.ReadAll(response.Body) body, err := io.ReadAll(response.Body)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to read downloaded icon: %v", err) return nil, fmt.Errorf("icon: unable to read downloaded icon: %v", err)
} }
if len(body) == 0 { if len(body) == 0 {
return nil, fmt.Errorf("downloaded icon is empty, iconURL=%s", iconURL) return nil, fmt.Errorf("icon: downloaded icon is empty, iconURL=%s", iconURL)
} }
icon := &model.Icon{ icon := &model.Icon{

View file

@ -60,6 +60,7 @@ func (h *handler) imageProxy(w http.ResponseWriter, r *http.Request) {
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
logger.Error(`[Proxy] code=%d url=%q`, resp.StatusCode, imageURL)
html.NotFound(w, r) html.NotFound(w, r)
return return
} }