Use truncated entry description as title if unavailable
This commit is contained in:
parent
c9e0f0b3e4
commit
1eb01b39e7
10 changed files with 314 additions and 24 deletions
|
@ -60,6 +60,10 @@ func (a *atom03Feed) Transform(baseURL string) *model.Feed {
|
|||
item.Author = a.Author.String()
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = sanitizer.TruncateHTML(item.Content, 100)
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = item.URL
|
||||
}
|
||||
|
|
|
@ -98,7 +98,7 @@ func TestParseAtom03WithoutFeedTitle(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithoutEntryTitle(t *testing.T) {
|
||||
func TestParseAtom03WithoutEntryTitleButWithLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
|
@ -125,6 +125,62 @@ func TestParseAtom03WithoutEntryTitle(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithoutEntryTitleButWithSummary(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<summary type="text/plain">It's a test</summary>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "It's a test" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithoutEntryTitleButWithXMLContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<content mode="xml" type="text/html"><p>Some text.</p></content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Some text." {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithSummaryOnly(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"miniflux.app/model"
|
||||
"miniflux.app/reader/date"
|
||||
"miniflux.app/reader/media"
|
||||
"miniflux.app/reader/sanitizer"
|
||||
"miniflux.app/url"
|
||||
)
|
||||
|
||||
|
@ -64,6 +65,10 @@ func (a *atom10Feed) Transform(baseURL string) *model.Feed {
|
|||
item.Author = a.Authors.String()
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = sanitizer.TruncateHTML(item.Content, 100)
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = item.URL
|
||||
}
|
||||
|
|
|
@ -100,7 +100,37 @@ func TestParseFeedWithoutTitle(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitle(t *testing.T) {
|
||||
func TestParseEntryWithoutTitleButWithURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitleButWithSummary(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
|
@ -126,7 +156,40 @@ func TestParseEntryWithoutTitle(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" {
|
||||
if feed.Entries[0].Title != "Some text." {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitleButWithXHTMLContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">AT&T bought <b>by SBC</b>!</div>
|
||||
</content>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "AT&T bought by SBC!" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"miniflux.app/logger"
|
||||
"miniflux.app/model"
|
||||
"miniflux.app/reader/date"
|
||||
"miniflux.app/reader/sanitizer"
|
||||
"miniflux.app/url"
|
||||
)
|
||||
|
||||
|
@ -130,9 +131,13 @@ func (j *jsonItem) GetHash() string {
|
|||
}
|
||||
|
||||
func (j *jsonItem) GetTitle() string {
|
||||
for _, value := range []string{j.Title, j.Summary, j.Text, j.URL} {
|
||||
if j.Title != "" {
|
||||
return j.Title
|
||||
}
|
||||
|
||||
for _, value := range []string{j.Summary, j.Text, j.HTML} {
|
||||
if value != "" {
|
||||
return truncate(value)
|
||||
return sanitizer.TruncateHTML(value, 100)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -186,16 +191,3 @@ func getAuthor(author jsonAuthor) string {
|
|||
|
||||
return ""
|
||||
}
|
||||
|
||||
func truncate(str string) string {
|
||||
max := 100
|
||||
str = strings.TrimSpace(str)
|
||||
|
||||
// Convert to runes to be safe with unicode
|
||||
runes := []rune(str)
|
||||
if len(runes) > max {
|
||||
return string(runes[:max]) + "…"
|
||||
}
|
||||
|
||||
return str
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ func TestParseJsonFeed(t *testing.T) {
|
|||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Title != "https://example.org/initial-post" {
|
||||
if feed.Entries[1].Title != "Hello, world!" {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
|
||||
}
|
||||
|
||||
|
@ -398,7 +398,7 @@ func TestParseFeedItemWithoutID(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitle(t *testing.T) {
|
||||
func TestParseFeedItemWithoutTitleButWithURL(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
|
@ -425,7 +425,7 @@ func TestParseFeedItemWithoutTitle(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseTruncateItemTitle(t *testing.T) {
|
||||
func TestParseFeedItemWithoutTitleButWithSummary(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
|
@ -433,7 +433,61 @@ func TestParseTruncateItemTitle(t *testing.T) {
|
|||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"title": "` + strings.Repeat("a", 200) + `"
|
||||
"summary": "This is some text content."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is some text content." {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitleButWithHTMLContent(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"content_html": "This is <strong>HTML</strong>."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is HTML." {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitleButWithTextContent(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"content_text": "` + strings.Repeat("a", 200) + `"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
@ -448,7 +502,7 @@ func TestParseTruncateItemTitle(t *testing.T) {
|
|||
}
|
||||
|
||||
if len(feed.Entries[0].Title) != 103 {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
t.Errorf("Incorrect entry title, got: %d", len(feed.Entries[0].Title))
|
||||
}
|
||||
|
||||
if len([]rune(feed.Entries[0].Title)) != 101 {
|
||||
|
|
|
@ -115,7 +115,7 @@ func TestParseFeedWithoutTitle(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitle(t *testing.T) {
|
||||
func TestParseEntryWithoutTitleAndDescription(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
|
@ -136,6 +136,30 @@ func TestParseEntryWithoutTitle(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitleButWithDescription(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<link>https://example.org/item</link>
|
||||
<description>
|
||||
This is the description
|
||||
</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is the description" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithMediaTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
|
||||
|
|
|
@ -73,6 +73,10 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
|
|||
}
|
||||
}
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
|
||||
}
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
|
23
reader/sanitizer/truncate.go
Normal file
23
reader/sanitizer/truncate.go
Normal file
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2022 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package sanitizer
|
||||
|
||||
import "strings"
|
||||
|
||||
func TruncateHTML(input string, max int) string {
|
||||
text := StripTags(input)
|
||||
text = strings.ReplaceAll(text, "\n", " ")
|
||||
text = strings.ReplaceAll(text, "\t", " ")
|
||||
text = strings.ReplaceAll(text, " ", " ")
|
||||
text = strings.TrimSpace(text)
|
||||
|
||||
// Convert to runes to be safe with unicode
|
||||
runes := []rune(text)
|
||||
if len(runes) > max {
|
||||
return strings.TrimSpace(string(runes[:max])) + "…"
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
65
reader/sanitizer/truncate_test.go
Normal file
65
reader/sanitizer/truncate_test.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
// Copyright 2022 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package sanitizer
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTruncateHTMWithTextLowerThanLimitL(t *testing.T) {
|
||||
input := `This is a <strong>bug 🐛</strong>.`
|
||||
expected := `This is a bug 🐛.`
|
||||
output := TruncateHTML(input, 50)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithTextAboveLimit(t *testing.T) {
|
||||
input := `This is <strong>HTML</strong>.`
|
||||
expected := `This…`
|
||||
output := TruncateHTML(input, 4)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithUnicodeTextAboveLimit(t *testing.T) {
|
||||
input := `This is a <strong>bike 🚲</strong>.`
|
||||
expected := `This…`
|
||||
output := TruncateHTML(input, 4)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithMultilineTextAboveLimit(t *testing.T) {
|
||||
input := `
|
||||
This is a <strong>bike
|
||||
🚲</strong>.
|
||||
|
||||
`
|
||||
expected := `This is a bike…`
|
||||
output := TruncateHTML(input, 15)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithMultilineTextLowerThanLimit(t *testing.T) {
|
||||
input := `
|
||||
This is a <strong>bike
|
||||
🚲</strong>.
|
||||
|
||||
`
|
||||
expected := `This is a bike 🚲.`
|
||||
output := TruncateHTML(input, 20)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue