Improve Dublin Core support for RDF feeds
This commit is contained in:
parent
1b33bb3d1c
commit
200b1c304b
3 changed files with 86 additions and 15 deletions
17
reader/rdf/dublincore.go
Normal file
17
reader/rdf/dublincore.go
Normal file
|
@ -0,0 +1,17 @@
|
|||
// Copyright 2019 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rdf // import "miniflux.app/reader/rdf"
|
||||
|
||||
// DublinCoreFeedElement represents Dublin Core feed XML elements.
|
||||
type DublinCoreFeedElement struct {
|
||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
|
||||
}
|
||||
|
||||
// DublinCoreEntryElement represents Dublin Core entry XML elements.
|
||||
type DublinCoreEntryElement struct {
|
||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
}
|
|
@ -486,3 +486,36 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) {
|
|||
t.Errorf(`Unexpected entry URL, got %q`, feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRDFWithContentEncoded(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||
<channel>
|
||||
<title>Example Feed</title>
|
||||
<link>http://example.org/</link>
|
||||
</channel>
|
||||
<item>
|
||||
<title>Item Title</title>
|
||||
<link>http://example.org/</link>
|
||||
<content:encoded><![CDATA[<p>Test</p>]]></content:encoded>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
||||
}
|
||||
|
||||
expected := `<p>Test</p>`
|
||||
result := feed.Entries[0].Content
|
||||
if result != expected {
|
||||
t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,8 +21,8 @@ type rdfFeed struct {
|
|||
XMLName xml.Name `xml:"RDF"`
|
||||
Title string `xml:"channel>title"`
|
||||
Link string `xml:"channel>link"`
|
||||
Creator string `xml:"channel>creator"`
|
||||
Items []rdfItem `xml:"item"`
|
||||
DublinCoreFeedElement
|
||||
}
|
||||
|
||||
func (r *rdfFeed) Transform() *model.Feed {
|
||||
|
@ -32,9 +32,10 @@ func (r *rdfFeed) Transform() *model.Feed {
|
|||
|
||||
for _, item := range r.Items {
|
||||
entry := item.Transform()
|
||||
if entry.Author == "" && r.Creator != "" {
|
||||
entry.Author = sanitizer.StripTags(r.Creator)
|
||||
if entry.Author == "" && r.DublinCoreCreator != "" {
|
||||
entry.Author = strings.TrimSpace(r.DublinCoreCreator)
|
||||
}
|
||||
entry.Author = sanitizer.StripTags(entry.Author)
|
||||
|
||||
if entry.URL == "" {
|
||||
entry.URL = feed.SiteURL
|
||||
|
@ -55,24 +56,44 @@ type rdfItem struct {
|
|||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
Creator string `xml:"creator"`
|
||||
Date string `xml:"date"`
|
||||
DublinCoreEntryElement
|
||||
}
|
||||
|
||||
func (r *rdfItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.Title = strings.TrimSpace(r.Title)
|
||||
entry.Author = strings.TrimSpace(r.Creator)
|
||||
entry.URL = strings.TrimSpace(r.Link)
|
||||
entry.Content = r.Description
|
||||
entry.Hash = getHash(r)
|
||||
entry.Date = getDate(r)
|
||||
entry.Title = r.entryTitle()
|
||||
entry.Author = r.entryAuthor()
|
||||
entry.URL = r.entryURL()
|
||||
entry.Content = r.entryContent()
|
||||
entry.Hash = r.entryHash()
|
||||
entry.Date = r.entryDate()
|
||||
return entry
|
||||
}
|
||||
|
||||
func getDate(r *rdfItem) time.Time {
|
||||
if r.Date != "" {
|
||||
result, err := date.Parse(r.Date)
|
||||
func (r *rdfItem) entryTitle() string {
|
||||
return strings.TrimSpace(r.Title)
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryContent() string {
|
||||
switch {
|
||||
case r.DublinCoreContent != "":
|
||||
return r.DublinCoreContent
|
||||
default:
|
||||
return r.Description
|
||||
}
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryAuthor() string {
|
||||
return strings.TrimSpace(r.DublinCoreCreator)
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryURL() string {
|
||||
return strings.TrimSpace(r.Link)
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryDate() time.Time {
|
||||
if r.DublinCoreDate != "" {
|
||||
result, err := date.Parse(r.DublinCoreDate)
|
||||
if err != nil {
|
||||
logger.Error("rdf: %v", err)
|
||||
return time.Now()
|
||||
|
@ -84,7 +105,7 @@ func getDate(r *rdfItem) time.Time {
|
|||
return time.Now()
|
||||
}
|
||||
|
||||
func getHash(r *rdfItem) string {
|
||||
func (r *rdfItem) entryHash() string {
|
||||
value := r.Link
|
||||
if value == "" {
|
||||
value = r.Title + r.Description
|
||||
|
|
Loading…
Reference in a new issue