Improve XML decoder to remove illegal characters
This commit is contained in:
parent
7409bba0d8
commit
2eb2441f2b
7 changed files with 85 additions and 19 deletions
|
@ -2624,6 +2624,7 @@ var translations = map[string]string{
|
||||||
"Unable to parse Atom feed: %q": "无法解析Atom源: %q",
|
"Unable to parse Atom feed: %q": "无法解析Atom源: %q",
|
||||||
"Unable to parse JSON feed: %q": "无法解析JSON源: %q",
|
"Unable to parse JSON feed: %q": "无法解析JSON源: %q",
|
||||||
"Unable to parse RDF feed: %q": "无法解析RDF源: %q",
|
"Unable to parse RDF feed: %q": "无法解析RDF源: %q",
|
||||||
|
"Unable to read data: %q": "无法读取数据: %q",
|
||||||
"Unable to normalize encoding: %q": "无法正则化编码: %q",
|
"Unable to normalize encoding: %q": "无法正则化编码: %q",
|
||||||
"Category not found for this user": "未找到该用户的这一分类",
|
"Category not found for this user": "未找到该用户的这一分类",
|
||||||
"This feed is empty": "该源是空的",
|
"This feed is empty": "该源是空的",
|
||||||
|
@ -2645,5 +2646,5 @@ var translationsChecksums = map[string]string{
|
||||||
"nl_NL": "a91e2195ac0731a3788405a51c4201e1a89dcce35ef792356e8c17adb57aee97",
|
"nl_NL": "a91e2195ac0731a3788405a51c4201e1a89dcce35ef792356e8c17adb57aee97",
|
||||||
"pl_PL": "097bc9beac12f33d3a5e5ee98ccba0875e4d1c1bf13e38251a66ac450834c5b3",
|
"pl_PL": "097bc9beac12f33d3a5e5ee98ccba0875e4d1c1bf13e38251a66ac450834c5b3",
|
||||||
"ru_RU": "b253bf709a2f4bcac2f894bd1797247481fa7c6b70a0a0d8785d8680be83bac8",
|
"ru_RU": "b253bf709a2f4bcac2f894bd1797247481fa7c6b70a0a0d8785d8680be83bac8",
|
||||||
"zh_CN": "cb974ad8c374278057db4ca58ff0e59314dc191e2ea59af0d1472a438a9ce3e0",
|
"zh_CN": "5004e07fa535ea56e7fbe1501bb8ff4191d1d214e51b4590110b660994c39f0d",
|
||||||
}
|
}
|
||||||
|
|
|
@ -279,6 +279,7 @@
|
||||||
"Unable to parse Atom feed: %q": "无法解析Atom源: %q",
|
"Unable to parse Atom feed: %q": "无法解析Atom源: %q",
|
||||||
"Unable to parse JSON feed: %q": "无法解析JSON源: %q",
|
"Unable to parse JSON feed: %q": "无法解析JSON源: %q",
|
||||||
"Unable to parse RDF feed: %q": "无法解析RDF源: %q",
|
"Unable to parse RDF feed: %q": "无法解析RDF源: %q",
|
||||||
|
"Unable to read data: %q": "无法读取数据: %q",
|
||||||
"Unable to normalize encoding: %q": "无法正则化编码: %q",
|
"Unable to normalize encoding: %q": "无法正则化编码: %q",
|
||||||
"Category not found for this user": "未找到该用户的这一分类",
|
"Category not found for this user": "未找到该用户的这一分类",
|
||||||
"This feed is empty": "该源是空的",
|
"This feed is empty": "该源是空的",
|
||||||
|
|
|
@ -5,22 +5,17 @@
|
||||||
package atom // import "miniflux.app/reader/atom"
|
package atom // import "miniflux.app/reader/atom"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"miniflux.app/errors"
|
"miniflux.app/errors"
|
||||||
"miniflux.app/model"
|
"miniflux.app/model"
|
||||||
"miniflux.app/reader/encoding"
|
"miniflux.app/reader/xml"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Parse returns a normalized feed struct from a Atom feed.
|
// Parse returns a normalized feed struct from a Atom feed.
|
||||||
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||||
atomFeed := new(atomFeed)
|
atomFeed := new(atomFeed)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
decoder.Entity = xml.HTMLEntity
|
|
||||||
decoder.Strict = false
|
|
||||||
decoder.CharsetReader = encoding.CharsetReader
|
|
||||||
|
|
||||||
err := decoder.Decode(atomFeed)
|
err := decoder.Decode(atomFeed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %q", err)
|
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %q", err)
|
||||||
|
|
|
@ -5,22 +5,17 @@
|
||||||
package rdf // import "miniflux.app/reader/rdf"
|
package rdf // import "miniflux.app/reader/rdf"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"miniflux.app/errors"
|
"miniflux.app/errors"
|
||||||
"miniflux.app/model"
|
"miniflux.app/model"
|
||||||
"miniflux.app/reader/encoding"
|
"miniflux.app/reader/xml"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Parse returns a normalized feed struct from a RDF feed.
|
// Parse returns a normalized feed struct from a RDF feed.
|
||||||
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||||
feed := new(rdfFeed)
|
feed := new(rdfFeed)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
decoder.Entity = xml.HTMLEntity
|
|
||||||
decoder.Strict = false
|
|
||||||
decoder.CharsetReader = encoding.CharsetReader
|
|
||||||
|
|
||||||
err := decoder.Decode(feed)
|
err := decoder.Decode(feed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.NewLocalizedError("Unable to parse RDF feed: %q", err)
|
return nil, errors.NewLocalizedError("Unable to parse RDF feed: %q", err)
|
||||||
|
|
|
@ -5,22 +5,17 @@
|
||||||
package rss // import "miniflux.app/reader/rss"
|
package rss // import "miniflux.app/reader/rss"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"miniflux.app/errors"
|
"miniflux.app/errors"
|
||||||
"miniflux.app/model"
|
"miniflux.app/model"
|
||||||
"miniflux.app/reader/encoding"
|
"miniflux.app/reader/xml"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Parse returns a normalized feed struct from a RSS feed.
|
// Parse returns a normalized feed struct from a RSS feed.
|
||||||
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||||
feed := new(rssFeed)
|
feed := new(rssFeed)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
decoder.Entity = xml.HTMLEntity
|
|
||||||
decoder.Strict = false
|
|
||||||
decoder.CharsetReader = encoding.CharsetReader
|
|
||||||
|
|
||||||
err := decoder.Decode(feed)
|
err := decoder.Decode(feed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %q", err)
|
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %q", err)
|
||||||
|
|
50
reader/xml/decoder.go
Normal file
50
reader/xml/decoder.go
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
// Copyright 2019 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package xml // import "miniflux.app/reader/xml"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"miniflux.app/reader/encoding"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewDecoder returns a XML decoder that filters illegal characters.
|
||||||
|
func NewDecoder(data io.Reader) *xml.Decoder {
|
||||||
|
decoder := xml.NewDecoder(data)
|
||||||
|
decoder.Entity = xml.HTMLEntity
|
||||||
|
decoder.Strict = false
|
||||||
|
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
|
||||||
|
utf8Reader, err := encoding.CharsetReader(charset, input)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rawData, err := ioutil.ReadAll(utf8Reader)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Unable to read data: %q", err)
|
||||||
|
}
|
||||||
|
filteredBytes := bytes.Map(filterValidXMLChar, rawData)
|
||||||
|
return bytes.NewReader(filteredBytes), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return decoder
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function is copied from encoding/xml package,
|
||||||
|
// and is used to check if all the characters are legal.
|
||||||
|
func filterValidXMLChar(r rune) rune {
|
||||||
|
if r == 0x09 ||
|
||||||
|
r == 0x0A ||
|
||||||
|
r == 0x0D ||
|
||||||
|
r >= 0x20 && r <= 0xD7FF ||
|
||||||
|
r >= 0xE000 && r <= 0xFFFD ||
|
||||||
|
r >= 0x10000 && r <= 0x10FFFF {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
29
reader/xml/decoder_test.go
Normal file
29
reader/xml/decoder_test.go
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright 2019 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package xml // import "miniflux.app/reader/xml"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIllegalCharacters(t *testing.T) {
|
||||||
|
type myxml struct {
|
||||||
|
XMLName xml.Name `xml:"rss"`
|
||||||
|
Version string `xml:"version,attr"`
|
||||||
|
Title string `xml:"title"`
|
||||||
|
}
|
||||||
|
|
||||||
|
data := fmt.Sprintf(`<?xml version="1.0" encoding="windows-1251"?><rss version="2.0"><title>%s</title></rss>`, "\x10")
|
||||||
|
var x myxml
|
||||||
|
|
||||||
|
decoder := NewDecoder(strings.NewReader(data))
|
||||||
|
err := decoder.Decode(&x)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue