1
0
Fork 0
miniflux/http/client/response.go
Frédéric Guillot 3debf75eb9 Normalize URL query string before executing HTTP requests
- Make sure query strings parameters are encoded
- As opposed to the standard library, do not append equal sign
for query parameters with empty value
- Strip URL fragments like Web browsers
2019-12-26 15:56:59 -08:00

127 lines
3.1 KiB
Go

// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package client // import "miniflux.app/http/client"
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"regexp"
"strings"
"unicode/utf8"
"golang.org/x/net/html/charset"
)
var xmlEncodingRegex = regexp.MustCompile(`<\?xml(.*)encoding=["'](.+)["'](.*)\?>`)
// Response wraps a server response.
type Response struct {
Body io.Reader
StatusCode int
EffectiveURL string
LastModified string
ETag string
Expires string
ContentType string
ContentLength int64
}
func (r *Response) String() string {
return fmt.Sprintf(
`StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
r.StatusCode,
r.EffectiveURL,
r.LastModified,
r.ETag,
r.Expires,
r.ContentType,
r.ContentLength,
)
}
// IsNotFound returns true if the resource doesn't exists anymore.
func (r *Response) IsNotFound() bool {
return r.StatusCode == 404 || r.StatusCode == 410
}
// IsNotAuthorized returns true if the resource require authentication.
func (r *Response) IsNotAuthorized() bool {
return r.StatusCode == 401
}
// HasServerFailure returns true if the status code represents a failure.
func (r *Response) HasServerFailure() bool {
return r.StatusCode >= 400
}
// IsModified returns true if the resource has been modified.
func (r *Response) IsModified(etag, lastModified string) bool {
if r.StatusCode == 304 {
return false
}
if r.ETag != "" && r.ETag == etag {
return false
}
if r.LastModified != "" && r.LastModified == lastModified {
return false
}
return true
}
// EnsureUnicodeBody makes sure the body is encoded in UTF-8.
//
// If a charset other than UTF-8 is detected, we convert the document to UTF-8.
// This is used by the scraper and feed readers.
//
// Do not forget edge cases:
//
// - Feeds with encoding specified only in Content-Type header and not in XML document
// - Feeds with encoding specified in both places
// - Feeds with encoding specified only in XML document and not in HTTP header
// - Feeds with wrong encoding defined and already in UTF-8
func (r *Response) EnsureUnicodeBody() (err error) {
if r.ContentType != "" {
// JSON feeds are always in UTF-8.
if strings.Contains(r.ContentType, "json") {
return
}
if strings.Contains(r.ContentType, "xml") {
buffer, _ := ioutil.ReadAll(r.Body)
r.Body = bytes.NewReader(buffer)
// We ignore documents with encoding specified in XML prolog.
// This is going to be handled by the XML parser.
length := 1024
if len(buffer) < 1024 {
length = len(buffer)
}
if xmlEncodingRegex.Match(buffer[0:length]) {
return
}
// If no encoding is specified in the XML prolog and
// the document is valid UTF-8, nothing needs to be done.
if utf8.Valid(buffer) {
return
}
}
}
r.Body, err = charset.NewReader(r.Body, r.ContentType)
return err
}
// BodyAsString returns the response body as string.
func (r *Response) BodyAsString() string {
bytes, _ := ioutil.ReadAll(r.Body)
return string(bytes)
}