2023-06-19 17:42:47 -04:00
|
|
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
2017-11-20 21:34:11 -05:00
|
|
|
|
2023-08-10 22:46:45 -04:00
|
|
|
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
2017-11-20 21:34:11 -05:00
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/xml"
|
2021-03-19 21:39:44 -04:00
|
|
|
"html"
|
2023-09-24 19:32:09 -04:00
|
|
|
"log/slog"
|
2017-11-22 17:52:31 -05:00
|
|
|
"strings"
|
2017-11-20 22:25:30 -05:00
|
|
|
"time"
|
2017-11-20 21:34:11 -05:00
|
|
|
|
2023-08-10 22:46:45 -04:00
|
|
|
"miniflux.app/v2/internal/crypto"
|
|
|
|
"miniflux.app/v2/internal/model"
|
|
|
|
"miniflux.app/v2/internal/reader/date"
|
2023-09-08 19:50:06 -04:00
|
|
|
"miniflux.app/v2/internal/reader/dublincore"
|
2023-08-10 22:46:45 -04:00
|
|
|
"miniflux.app/v2/internal/reader/sanitizer"
|
2023-08-13 22:09:01 -04:00
|
|
|
"miniflux.app/v2/internal/urllib"
|
2017-11-20 21:34:11 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
type rdfFeed struct {
|
|
|
|
XMLName xml.Name `xml:"RDF"`
|
|
|
|
Title string `xml:"channel>title"`
|
|
|
|
Link string `xml:"channel>link"`
|
|
|
|
Items []rdfItem `xml:"item"`
|
2023-09-08 19:50:06 -04:00
|
|
|
dublincore.DublinCoreFeedElement
|
2017-11-20 21:34:11 -05:00
|
|
|
}
|
|
|
|
|
2020-12-02 23:47:11 -05:00
|
|
|
func (r *rdfFeed) Transform(baseURL string) *model.Feed {
|
|
|
|
var err error
|
2017-11-20 21:34:11 -05:00
|
|
|
feed := new(model.Feed)
|
|
|
|
feed.Title = sanitizer.StripTags(r.Title)
|
2020-12-02 23:47:11 -05:00
|
|
|
feed.FeedURL = baseURL
|
2023-08-13 22:09:01 -04:00
|
|
|
feed.SiteURL, err = urllib.AbsoluteURL(baseURL, r.Link)
|
2020-12-02 23:47:11 -05:00
|
|
|
if err != nil {
|
|
|
|
feed.SiteURL = r.Link
|
|
|
|
}
|
2017-11-20 21:34:11 -05:00
|
|
|
|
|
|
|
for _, item := range r.Items {
|
|
|
|
entry := item.Transform()
|
2019-12-23 17:39:54 -05:00
|
|
|
if entry.Author == "" && r.DublinCoreCreator != "" {
|
2023-09-08 19:50:06 -04:00
|
|
|
entry.Author = r.GetSanitizedCreator()
|
2017-11-20 21:34:11 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if entry.URL == "" {
|
|
|
|
entry.URL = feed.SiteURL
|
2017-12-13 23:16:15 -05:00
|
|
|
} else {
|
2023-08-13 22:09:01 -04:00
|
|
|
entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
|
2017-12-13 23:16:15 -05:00
|
|
|
if err == nil {
|
|
|
|
entry.URL = entryURL
|
|
|
|
}
|
2017-11-20 21:34:11 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
feed.Entries = append(feed.Entries, entry)
|
|
|
|
}
|
|
|
|
|
|
|
|
return feed
|
|
|
|
}
|
|
|
|
|
|
|
|
type rdfItem struct {
|
|
|
|
Title string `xml:"title"`
|
|
|
|
Link string `xml:"link"`
|
|
|
|
Description string `xml:"description"`
|
2023-09-08 19:50:06 -04:00
|
|
|
dublincore.DublinCoreItemElement
|
2017-11-20 21:34:11 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *rdfItem) Transform() *model.Entry {
|
2023-09-09 01:45:17 -04:00
|
|
|
entry := model.NewEntry()
|
2019-12-23 17:39:54 -05:00
|
|
|
entry.Title = r.entryTitle()
|
|
|
|
entry.Author = r.entryAuthor()
|
|
|
|
entry.URL = r.entryURL()
|
|
|
|
entry.Content = r.entryContent()
|
|
|
|
entry.Hash = r.entryHash()
|
|
|
|
entry.Date = r.entryDate()
|
2017-11-20 21:34:11 -05:00
|
|
|
return entry
|
|
|
|
}
|
|
|
|
|
2019-12-23 17:39:54 -05:00
|
|
|
func (r *rdfItem) entryTitle() string {
|
2021-03-19 21:39:44 -04:00
|
|
|
return html.UnescapeString(strings.TrimSpace(r.Title))
|
2019-12-23 17:39:54 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *rdfItem) entryContent() string {
|
|
|
|
switch {
|
|
|
|
case r.DublinCoreContent != "":
|
|
|
|
return r.DublinCoreContent
|
|
|
|
default:
|
|
|
|
return r.Description
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *rdfItem) entryAuthor() string {
|
2023-09-08 19:50:06 -04:00
|
|
|
return r.GetSanitizedCreator()
|
2019-12-23 17:39:54 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *rdfItem) entryURL() string {
|
|
|
|
return strings.TrimSpace(r.Link)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *rdfItem) entryDate() time.Time {
|
|
|
|
if r.DublinCoreDate != "" {
|
|
|
|
result, err := date.Parse(r.DublinCoreDate)
|
2018-04-10 21:13:05 -04:00
|
|
|
if err != nil {
|
2023-11-01 15:26:16 -04:00
|
|
|
slog.Debug("Unable to parse date from RDF feed",
|
2023-09-24 19:32:09 -04:00
|
|
|
slog.String("date", r.DublinCoreDate),
|
|
|
|
slog.String("link", r.Link),
|
|
|
|
slog.Any("error", err),
|
|
|
|
)
|
2018-04-10 21:13:05 -04:00
|
|
|
return time.Now()
|
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
return time.Now()
|
|
|
|
}
|
|
|
|
|
2019-12-23 17:39:54 -05:00
|
|
|
func (r *rdfItem) entryHash() string {
|
2017-11-20 21:34:11 -05:00
|
|
|
value := r.Link
|
|
|
|
if value == "" {
|
|
|
|
value = r.Title + r.Description
|
|
|
|
}
|
|
|
|
|
2018-01-02 22:15:08 -05:00
|
|
|
return crypto.Hash(value)
|
2017-11-20 21:34:11 -05:00
|
|
|
}
|