More robust Atom text handling
Miniflux couldn't deal with XHTML Summary elements. - Make Summary an 'atomContent' field - Define an atomContentToString function rather than inling it three times - Also properly escape special characters in plain text fields.
This commit is contained in:
parent
15505ee4a2
commit
0cdcec10ca
2 changed files with 103 additions and 16 deletions
|
@ -6,6 +6,7 @@ package atom // import "miniflux.app/reader/atom"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
|
"html"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
@ -33,7 +34,7 @@ type atomEntry struct {
|
||||||
Published string `xml:"published"`
|
Published string `xml:"published"`
|
||||||
Updated string `xml:"updated"`
|
Updated string `xml:"updated"`
|
||||||
Links []atomLink `xml:"link"`
|
Links []atomLink `xml:"link"`
|
||||||
Summary string `xml:"summary"`
|
Summary atomContent `xml:"summary"`
|
||||||
Content atomContent `xml:"content"`
|
Content atomContent `xml:"content"`
|
||||||
MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
|
MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
|
||||||
Author atomAuthor `xml:"author"`
|
Author atomAuthor `xml:"author"`
|
||||||
|
@ -147,17 +148,31 @@ func getDate(a *atomEntry) time.Time {
|
||||||
return time.Now()
|
return time.Now()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func atomContentToString(c atomContent) string {
|
||||||
|
if c.Type == "xhtml" {
|
||||||
|
return c.XML
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Type == "html" {
|
||||||
|
return c.Data
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Type == "text" || c.Type == "" {
|
||||||
|
return html.EscapeString(c.Data)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func getContent(a *atomEntry) string {
|
func getContent(a *atomEntry) string {
|
||||||
if a.Content.Type == "html" || a.Content.Type == "text" {
|
r := atomContentToString(a.Content)
|
||||||
return a.Content.Data
|
if r != "" {
|
||||||
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
if a.Content.Type == "xhtml" {
|
r = atomContentToString(a.Summary)
|
||||||
return a.Content.XML
|
if r != "" {
|
||||||
}
|
return r
|
||||||
|
|
||||||
if a.Summary != "" {
|
|
||||||
return a.Summary
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if a.MediaGroup.Description != "" {
|
if a.MediaGroup.Description != "" {
|
||||||
|
@ -168,13 +183,7 @@ func getContent(a *atomEntry) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTitle(a *atomEntry) string {
|
func getTitle(a *atomEntry) string {
|
||||||
title := ""
|
title := atomContentToString(a.Title)
|
||||||
if a.Title.Type == "xhtml" {
|
|
||||||
title = a.Title.XML
|
|
||||||
} else {
|
|
||||||
title = a.Title.Data
|
|
||||||
}
|
|
||||||
|
|
||||||
return strings.TrimSpace(sanitizer.StripTags(title))
|
return strings.TrimSpace(sanitizer.StripTags(title))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -282,6 +282,84 @@ func TestParseEntryTitleWithXHTML(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseEntrySummaryWithXHTML(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="xhtml"><code>Test</code> Test</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary type="xhtml"><p>Some text.</p></summary>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Content != "<p>Some text.</p>" {
|
||||||
|
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEntrySummaryWithHTML(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="html"><code>Test</code> Test</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary type="html"><![CDATA[<p>Some text.</p>]]></summary>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Content != "<p>Some text.</p>" {
|
||||||
|
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEntrySummaryWithPlainText(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="html"><code>Test</code> Test</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary type="text"><![CDATA[<Some text.>]]></summary>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Content != "<Some text.>" {
|
||||||
|
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseEntryWithAuthorName(t *testing.T) {
|
func TestParseEntryWithAuthorName(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
|
Loading…
Reference in a new issue