2023-06-19 17:42:47 -04:00
|
|
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
2019-12-23 01:18:21 -05:00
|
|
|
|
2023-08-10 00:15:55 -04:00
|
|
|
package atom // import "miniflux.app/v2/reader/atom"
|
2019-12-23 01:18:21 -05:00
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/base64"
|
|
|
|
"html"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2023-08-10 00:15:55 -04:00
|
|
|
"miniflux.app/v2/crypto"
|
|
|
|
"miniflux.app/v2/logger"
|
|
|
|
"miniflux.app/v2/model"
|
|
|
|
"miniflux.app/v2/reader/date"
|
|
|
|
"miniflux.app/v2/reader/sanitizer"
|
|
|
|
"miniflux.app/v2/url"
|
2019-12-23 01:18:21 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
// Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
|
|
|
|
type atom03Feed struct {
|
|
|
|
ID string `xml:"id"`
|
|
|
|
Title atom03Text `xml:"title"`
|
|
|
|
Author atomPerson `xml:"author"`
|
|
|
|
Links atomLinks `xml:"link"`
|
|
|
|
Entries []atom03Entry `xml:"entry"`
|
|
|
|
}
|
|
|
|
|
2020-12-02 23:47:11 -05:00
|
|
|
func (a *atom03Feed) Transform(baseURL string) *model.Feed {
|
|
|
|
var err error
|
|
|
|
|
2019-12-23 01:18:21 -05:00
|
|
|
feed := new(model.Feed)
|
|
|
|
|
2020-12-02 23:47:11 -05:00
|
|
|
feedURL := a.Links.firstLinkWithRelation("self")
|
|
|
|
feed.FeedURL, err = url.AbsoluteURL(baseURL, feedURL)
|
|
|
|
if err != nil {
|
|
|
|
feed.FeedURL = feedURL
|
|
|
|
}
|
|
|
|
|
|
|
|
siteURL := a.Links.originalLink()
|
|
|
|
feed.SiteURL, err = url.AbsoluteURL(baseURL, siteURL)
|
|
|
|
if err != nil {
|
|
|
|
feed.SiteURL = siteURL
|
|
|
|
}
|
|
|
|
|
|
|
|
feed.Title = a.Title.String()
|
2019-12-23 01:18:21 -05:00
|
|
|
if feed.Title == "" {
|
|
|
|
feed.Title = feed.SiteURL
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, entry := range a.Entries {
|
|
|
|
item := entry.Transform()
|
|
|
|
entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
|
|
|
|
if err == nil {
|
|
|
|
item.URL = entryURL
|
|
|
|
}
|
|
|
|
|
|
|
|
if item.Author == "" {
|
|
|
|
item.Author = a.Author.String()
|
|
|
|
}
|
|
|
|
|
2022-03-04 19:49:44 -05:00
|
|
|
if item.Title == "" {
|
|
|
|
item.Title = sanitizer.TruncateHTML(item.Content, 100)
|
|
|
|
}
|
|
|
|
|
2019-12-23 01:18:21 -05:00
|
|
|
if item.Title == "" {
|
|
|
|
item.Title = item.URL
|
|
|
|
}
|
|
|
|
|
|
|
|
feed.Entries = append(feed.Entries, item)
|
|
|
|
}
|
|
|
|
|
|
|
|
return feed
|
|
|
|
}
|
|
|
|
|
|
|
|
type atom03Entry struct {
|
|
|
|
ID string `xml:"id"`
|
|
|
|
Title atom03Text `xml:"title"`
|
|
|
|
Modified string `xml:"modified"`
|
|
|
|
Issued string `xml:"issued"`
|
|
|
|
Created string `xml:"created"`
|
|
|
|
Links atomLinks `xml:"link"`
|
|
|
|
Summary atom03Text `xml:"summary"`
|
|
|
|
Content atom03Text `xml:"content"`
|
|
|
|
Author atomPerson `xml:"author"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *atom03Entry) Transform() *model.Entry {
|
|
|
|
entry := new(model.Entry)
|
|
|
|
entry.URL = a.Links.originalLink()
|
|
|
|
entry.Date = a.entryDate()
|
|
|
|
entry.Author = a.Author.String()
|
|
|
|
entry.Hash = a.entryHash()
|
|
|
|
entry.Content = a.entryContent()
|
|
|
|
entry.Title = a.entryTitle()
|
|
|
|
return entry
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *atom03Entry) entryTitle() string {
|
|
|
|
return sanitizer.StripTags(a.Title.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *atom03Entry) entryContent() string {
|
|
|
|
content := a.Content.String()
|
|
|
|
if content != "" {
|
|
|
|
return content
|
|
|
|
}
|
|
|
|
|
|
|
|
summary := a.Summary.String()
|
|
|
|
if summary != "" {
|
|
|
|
return summary
|
|
|
|
}
|
|
|
|
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *atom03Entry) entryDate() time.Time {
|
|
|
|
dateText := ""
|
|
|
|
for _, value := range []string{a.Issued, a.Modified, a.Created} {
|
|
|
|
if value != "" {
|
|
|
|
dateText = value
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if dateText != "" {
|
|
|
|
result, err := date.Parse(dateText)
|
|
|
|
if err != nil {
|
|
|
|
logger.Error("atom: %v", err)
|
|
|
|
return time.Now()
|
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
return time.Now()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *atom03Entry) entryHash() string {
|
|
|
|
for _, value := range []string{a.ID, a.Links.originalLink()} {
|
|
|
|
if value != "" {
|
|
|
|
return crypto.Hash(value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
type atom03Text struct {
|
2021-03-21 13:52:27 -04:00
|
|
|
Type string `xml:"type,attr"`
|
|
|
|
Mode string `xml:"mode,attr"`
|
|
|
|
CharData string `xml:",chardata"`
|
|
|
|
InnerXML string `xml:",innerxml"`
|
2019-12-23 01:18:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (a *atom03Text) String() string {
|
|
|
|
content := ""
|
|
|
|
|
|
|
|
switch {
|
|
|
|
case a.Mode == "xml":
|
2021-03-21 13:52:27 -04:00
|
|
|
content = a.InnerXML
|
2019-12-23 01:18:21 -05:00
|
|
|
case a.Mode == "escaped":
|
2021-03-21 13:52:27 -04:00
|
|
|
content = a.CharData
|
2019-12-23 01:18:21 -05:00
|
|
|
case a.Mode == "base64":
|
2021-03-21 13:52:27 -04:00
|
|
|
b, err := base64.StdEncoding.DecodeString(a.CharData)
|
2019-12-23 01:18:21 -05:00
|
|
|
if err == nil {
|
|
|
|
content = string(b)
|
|
|
|
}
|
|
|
|
default:
|
2021-03-21 13:52:27 -04:00
|
|
|
content = a.CharData
|
2019-12-23 01:18:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if a.Type != "text/html" {
|
|
|
|
content = html.EscapeString(content)
|
|
|
|
}
|
|
|
|
|
|
|
|
return strings.TrimSpace(content)
|
|
|
|
}
|