Small refactoring of internal/reader/date/parser.go
- Split dates formats into those that require local times and those who don't, so that there is no need to have a switch-case in the for loop with around 250 iterations at most. - Be more strict when it comes to timezones, previously invalid ones like -13 were accepted. Also add a test for this. - Bail out early if the date is an empty string.
This commit is contained in:
parent
21da7f77f5
commit
040938ff6d
2 changed files with 33 additions and 29 deletions
|
@ -6,22 +6,25 @@ package date // import "miniflux.app/v2/internal/reader/date"
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DateFormats taken from github.com/mjibson/goread
|
// RFC822, RFC850, and RFC1123 formats should be applied only to local times.
|
||||||
var dateFormats = []string{
|
var dateFormatsLocalTimesOnly = []string{
|
||||||
time.RFC822, // RSS
|
time.RFC822, // RSS
|
||||||
|
time.RFC850,
|
||||||
|
time.RFC1123,
|
||||||
|
}
|
||||||
|
|
||||||
|
// dateFormats taken from github.com/mjibson/goread
|
||||||
|
var dateFormats = []string{
|
||||||
time.RFC822Z, // RSS
|
time.RFC822Z, // RSS
|
||||||
time.RFC3339, // Atom
|
time.RFC3339, // Atom
|
||||||
time.UnixDate,
|
time.UnixDate,
|
||||||
time.RubyDate,
|
time.RubyDate,
|
||||||
time.RFC850,
|
|
||||||
time.RFC1123Z,
|
time.RFC1123Z,
|
||||||
time.RFC1123,
|
|
||||||
time.ANSIC,
|
time.ANSIC,
|
||||||
"Mon, 02 Jan 2006 15:04:05 MST -07:00",
|
"Mon, 02 Jan 2006 15:04:05 MST -07:00",
|
||||||
"Mon, January 2, 2006, 3:04 PM MST",
|
"Mon, January 2, 2006, 3:04 PM MST",
|
||||||
|
@ -314,34 +317,30 @@ var invalidLocalizedDateReplacer = strings.NewReplacer(
|
||||||
// list of commonly found feed date formats.
|
// list of commonly found feed date formats.
|
||||||
func Parse(rawInput string) (t time.Time, err error) {
|
func Parse(rawInput string) (t time.Time, err error) {
|
||||||
rawInput = strings.TrimSpace(rawInput)
|
rawInput = strings.TrimSpace(rawInput)
|
||||||
timestamp, err := strconv.ParseInt(rawInput, 10, 64)
|
if rawInput == "" {
|
||||||
if err == nil {
|
return t, errors.New(`date parser: empty value`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if timestamp, err := strconv.ParseInt(rawInput, 10, 64); err == nil {
|
||||||
return time.Unix(timestamp, 0), nil
|
return time.Unix(timestamp, 0), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
|
processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
|
||||||
processedInput = invalidTimezoneReplacer.Replace(processedInput)
|
processedInput = invalidTimezoneReplacer.Replace(processedInput)
|
||||||
if processedInput == "" {
|
|
||||||
return t, errors.New(`date parser: empty value`)
|
for _, layout := range dateFormatsLocalTimesOnly {
|
||||||
|
if t, err = parseLocalTimeDates(layout, processedInput); err == nil {
|
||||||
|
return checkTimezoneRange(t), nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, layout := range dateFormats {
|
for _, layout := range dateFormats {
|
||||||
switch layout {
|
|
||||||
case time.RFC822, time.RFC850, time.RFC1123:
|
|
||||||
if t, err = parseLocalTimeDates(layout, processedInput); err == nil {
|
|
||||||
t = checkTimezoneRange(t)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if t, err = time.Parse(layout, processedInput); err == nil {
|
if t, err = time.Parse(layout, processedInput); err == nil {
|
||||||
t = checkTimezoneRange(t)
|
return checkTimezoneRange(t), nil
|
||||||
return
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = fmt.Errorf(`date parser: failed to parse date "%s"`, rawInput)
|
return t, fmt.Errorf(`date parser: failed to parse date "%s"`, rawInput)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// According to Golang documentation:
|
// According to Golang documentation:
|
||||||
|
@ -369,7 +368,7 @@ func parseLocalTimeDates(layout, ds string) (t time.Time, err error) {
|
||||||
// Avoid "pq: time zone displacement out of range" errors
|
// Avoid "pq: time zone displacement out of range" errors
|
||||||
func checkTimezoneRange(t time.Time) time.Time {
|
func checkTimezoneRange(t time.Time) time.Time {
|
||||||
_, offset := t.Zone()
|
_, offset := t.Zone()
|
||||||
if math.Abs(float64(offset)) > 14*60*60 {
|
if float64(offset) > 14*60*60 || float64(offset) < -12*60*60 {
|
||||||
t = t.UTC()
|
t = t.UTC()
|
||||||
}
|
}
|
||||||
return t
|
return t
|
||||||
|
|
|
@ -236,14 +236,19 @@ func TestParseWeirdDateFormat(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseDateWithTimezoneOutOfRange(t *testing.T) {
|
func TestParseDateWithTimezoneOutOfRange(t *testing.T) {
|
||||||
date, err := Parse("2023-05-29 00:00:00-23:00")
|
inputs := []string{
|
||||||
|
"2023-05-29 00:00:00-13:00",
|
||||||
|
"2023-05-29 00:00:00+15:00",
|
||||||
|
}
|
||||||
|
for _, input := range inputs {
|
||||||
|
date, err := Parse(input)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf(`Unable to parse date: %v`, err)
|
t.Errorf(`Unable to parse date: %v`, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, offset := date.Zone()
|
if _, offset := date.Zone(); offset != 0 {
|
||||||
if offset != 0 {
|
|
||||||
t.Errorf(`The offset should be reinitialized to 0 instead of %v because it's out of range`, offset)
|
t.Errorf(`The offset should be reinitialized to 0 instead of %v because it's out of range`, offset)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue