2017-11-20 00:10:04 -05:00
|
|
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
|
|
|
// Use of this source code is governed by the Apache 2.0
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2018-08-25 00:51:50 -04:00
|
|
|
package client // import "miniflux.app/http/client"
|
2017-11-20 00:10:04 -05:00
|
|
|
|
2017-11-22 01:55:19 -05:00
|
|
|
import (
|
2018-10-30 02:00:03 -04:00
|
|
|
"bytes"
|
2019-12-26 18:26:23 -05:00
|
|
|
"fmt"
|
2017-11-22 01:55:19 -05:00
|
|
|
"io"
|
2018-10-30 02:00:03 -04:00
|
|
|
"regexp"
|
2017-11-22 01:55:19 -05:00
|
|
|
"strings"
|
2019-01-01 16:06:58 -05:00
|
|
|
"unicode/utf8"
|
2017-11-22 01:55:19 -05:00
|
|
|
|
|
|
|
"golang.org/x/net/html/charset"
|
|
|
|
)
|
2017-11-20 00:10:04 -05:00
|
|
|
|
2018-12-13 00:13:06 -05:00
|
|
|
var xmlEncodingRegex = regexp.MustCompile(`<\?xml(.*)encoding=["'](.+)["'](.*)\?>`)
|
2018-10-30 02:00:03 -04:00
|
|
|
|
2017-11-20 20:12:37 -05:00
|
|
|
// Response wraps a server response.
|
|
|
|
type Response struct {
|
2018-01-04 21:32:36 -05:00
|
|
|
Body io.Reader
|
|
|
|
StatusCode int
|
|
|
|
EffectiveURL string
|
|
|
|
LastModified string
|
|
|
|
ETag string
|
2019-12-26 18:26:23 -05:00
|
|
|
Expires string
|
2018-01-04 21:32:36 -05:00
|
|
|
ContentType string
|
|
|
|
ContentLength int64
|
2017-11-20 00:10:04 -05:00
|
|
|
}
|
|
|
|
|
2019-12-26 18:26:23 -05:00
|
|
|
func (r *Response) String() string {
|
|
|
|
return fmt.Sprintf(
|
|
|
|
`StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
|
|
|
|
r.StatusCode,
|
|
|
|
r.EffectiveURL,
|
|
|
|
r.LastModified,
|
|
|
|
r.ETag,
|
|
|
|
r.Expires,
|
|
|
|
r.ContentType,
|
|
|
|
r.ContentLength,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2018-06-30 15:42:12 -04:00
|
|
|
// IsNotFound returns true if the resource doesn't exists anymore.
|
|
|
|
func (r *Response) IsNotFound() bool {
|
|
|
|
return r.StatusCode == 404 || r.StatusCode == 410
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsNotAuthorized returns true if the resource require authentication.
|
|
|
|
func (r *Response) IsNotAuthorized() bool {
|
|
|
|
return r.StatusCode == 401
|
|
|
|
}
|
|
|
|
|
2017-11-20 20:12:37 -05:00
|
|
|
// HasServerFailure returns true if the status code represents a failure.
|
|
|
|
func (r *Response) HasServerFailure() bool {
|
|
|
|
return r.StatusCode >= 400
|
2017-11-20 00:10:04 -05:00
|
|
|
}
|
|
|
|
|
2017-11-20 20:12:37 -05:00
|
|
|
// IsModified returns true if the resource has been modified.
|
|
|
|
func (r *Response) IsModified(etag, lastModified string) bool {
|
|
|
|
if r.StatusCode == 304 {
|
2017-11-20 00:10:04 -05:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2017-11-20 20:25:45 -05:00
|
|
|
if r.ETag != "" && r.ETag == etag {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.LastModified != "" && r.LastModified == lastModified {
|
2017-11-20 00:10:04 -05:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
2017-11-20 20:12:37 -05:00
|
|
|
|
2018-10-14 14:46:41 -04:00
|
|
|
// EnsureUnicodeBody makes sure the body is encoded in UTF-8.
|
2018-01-20 01:42:55 -05:00
|
|
|
//
|
|
|
|
// If a charset other than UTF-8 is detected, we convert the document to UTF-8.
|
|
|
|
// This is used by the scraper and feed readers.
|
|
|
|
//
|
|
|
|
// Do not forget edge cases:
|
2018-10-30 02:00:03 -04:00
|
|
|
//
|
|
|
|
// - Feeds with encoding specified only in Content-Type header and not in XML document
|
|
|
|
// - Feeds with encoding specified in both places
|
|
|
|
// - Feeds with encoding specified only in XML document and not in HTTP header
|
|
|
|
// - Feeds with wrong encoding defined and already in UTF-8
|
|
|
|
func (r *Response) EnsureUnicodeBody() (err error) {
|
2021-02-17 00:19:03 -05:00
|
|
|
buffer, err := io.ReadAll(r.Body)
|
2020-10-31 01:46:43 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
r.Body = bytes.NewReader(buffer)
|
|
|
|
if utf8.Valid(buffer) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if strings.Contains(r.ContentType, "xml") {
|
|
|
|
// We ignore documents with encoding specified in XML prolog.
|
|
|
|
// This is going to be handled by the XML parser.
|
|
|
|
length := 1024
|
|
|
|
if len(buffer) < 1024 {
|
|
|
|
length = len(buffer)
|
2018-12-13 00:37:39 -05:00
|
|
|
}
|
|
|
|
|
2020-10-31 01:46:43 -04:00
|
|
|
if xmlEncodingRegex.Match(buffer[0:length]) {
|
|
|
|
return nil
|
2018-01-20 01:42:55 -05:00
|
|
|
}
|
2017-11-22 01:55:19 -05:00
|
|
|
}
|
2018-10-30 02:00:03 -04:00
|
|
|
|
|
|
|
r.Body, err = charset.NewReader(r.Body, r.ContentType)
|
|
|
|
return err
|
2018-10-14 14:46:41 -04:00
|
|
|
}
|
|
|
|
|
2019-12-26 18:26:23 -05:00
|
|
|
// BodyAsString returns the response body as string.
|
|
|
|
func (r *Response) BodyAsString() string {
|
2021-02-17 00:19:03 -05:00
|
|
|
bytes, _ := io.ReadAll(r.Body)
|
2018-10-14 14:46:41 -04:00
|
|
|
return string(bytes)
|
2017-11-20 20:12:37 -05:00
|
|
|
}
|