Handle RSS author elements with inner HTML
This commit is contained in:
parent
34cdffda88
commit
6ea4da3bce
2 changed files with 30 additions and 3 deletions
|
@ -230,6 +230,31 @@ func TestParseFeedURLWithAtomLink(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseEntryWithAuthorAndInnerHTML(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>https://example.org/</link>
|
||||||
|
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||||
|
<item>
|
||||||
|
<title>Test</title>
|
||||||
|
<link>https://example.org/item</link>
|
||||||
|
<author>by <a itemprop="url" class="author" rel="author" href="/author/foobar">Foo Bar</a></author>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Author != "by Foo Bar" {
|
||||||
|
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseEntryWithAtomAuthor(t *testing.T) {
|
func TestParseEntryWithAtomAuthor(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||||
|
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"github.com/miniflux/miniflux/logger"
|
"github.com/miniflux/miniflux/logger"
|
||||||
"github.com/miniflux/miniflux/model"
|
"github.com/miniflux/miniflux/model"
|
||||||
"github.com/miniflux/miniflux/reader/date"
|
"github.com/miniflux/miniflux/reader/date"
|
||||||
|
"github.com/miniflux/miniflux/reader/sanitizer"
|
||||||
"github.com/miniflux/miniflux/url"
|
"github.com/miniflux/miniflux/url"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -56,6 +57,7 @@ type rssAuthor struct {
|
||||||
XMLName xml.Name
|
XMLName xml.Name
|
||||||
Data string `xml:",chardata"`
|
Data string `xml:",chardata"`
|
||||||
Name string `xml:"name"`
|
Name string `xml:"name"`
|
||||||
|
Inner string `xml:",innerxml"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type rssEnclosure struct {
|
type rssEnclosure struct {
|
||||||
|
@ -100,7 +102,7 @@ func (r *rssFeed) Transform() *model.Feed {
|
||||||
if entry.Author == "" && r.ItunesAuthor != "" {
|
if entry.Author == "" && r.ItunesAuthor != "" {
|
||||||
entry.Author = r.ItunesAuthor
|
entry.Author = r.ItunesAuthor
|
||||||
}
|
}
|
||||||
entry.Author = strings.TrimSpace(entry.Author)
|
entry.Author = strings.TrimSpace(sanitizer.StripTags(entry.Author))
|
||||||
|
|
||||||
if entry.URL == "" {
|
if entry.URL == "" {
|
||||||
entry.URL = feed.SiteURL
|
entry.URL = feed.SiteURL
|
||||||
|
@ -146,8 +148,8 @@ func (r *rssItem) GetAuthor() string {
|
||||||
return element.Name
|
return element.Name
|
||||||
}
|
}
|
||||||
|
|
||||||
if element.Data != "" {
|
if element.Inner != "" {
|
||||||
return element.Data
|
return element.Inner
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue