Handle RSS author elements with inner HTML
This commit is contained in:
parent
34cdffda88
commit
6ea4da3bce
2 changed files with 30 additions and 3 deletions
|
@ -230,6 +230,31 @@ func TestParseFeedURLWithAtomLink(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAuthorAndInnerHTML(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<author>by <a itemprop="url" class="author" rel="author" href="/author/foobar">Foo Bar</a></author>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "by Foo Bar" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAtomAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"github.com/miniflux/miniflux/logger"
|
||||
"github.com/miniflux/miniflux/model"
|
||||
"github.com/miniflux/miniflux/reader/date"
|
||||
"github.com/miniflux/miniflux/reader/sanitizer"
|
||||
"github.com/miniflux/miniflux/url"
|
||||
)
|
||||
|
||||
|
@ -56,6 +57,7 @@ type rssAuthor struct {
|
|||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Name string `xml:"name"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type rssEnclosure struct {
|
||||
|
@ -100,7 +102,7 @@ func (r *rssFeed) Transform() *model.Feed {
|
|||
if entry.Author == "" && r.ItunesAuthor != "" {
|
||||
entry.Author = r.ItunesAuthor
|
||||
}
|
||||
entry.Author = strings.TrimSpace(entry.Author)
|
||||
entry.Author = strings.TrimSpace(sanitizer.StripTags(entry.Author))
|
||||
|
||||
if entry.URL == "" {
|
||||
entry.URL = feed.SiteURL
|
||||
|
@ -146,8 +148,8 @@ func (r *rssItem) GetAuthor() string {
|
|||
return element.Name
|
||||
}
|
||||
|
||||
if element.Data != "" {
|
||||
return element.Data
|
||||
if element.Inner != "" {
|
||||
return element.Inner
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue