Improve HTML sanitizer
This commit is contained in:
		
							parent
							
								
									1f015d5dfe
								
							
						
					
					
						commit
						bd663b43a0
					
				
					 4 changed files with 28 additions and 7 deletions
				
			
		|  | @ -148,7 +148,7 @@ func TestParsePodcast(t *testing.T) { | |||
| 		t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title) | ||||
| 	} | ||||
| 
 | ||||
| 	if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` { | ||||
| 	if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` { | ||||
| 		t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content) | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -94,7 +94,7 @@ func TestParseRss2Sample(t *testing.T) { | |||
| 		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) | ||||
| 	} | ||||
| 
 | ||||
| 	if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` { | ||||
| 	if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` { | ||||
| 		t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
|  | @ -7,10 +7,11 @@ package sanitizer | |||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"github.com/miniflux/miniflux2/reader/url" | ||||
| 	"io" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/miniflux/miniflux2/reader/url" | ||||
| 
 | ||||
| 	"golang.org/x/net/html" | ||||
| ) | ||||
| 
 | ||||
|  | @ -33,7 +34,7 @@ func Sanitize(baseURL, input string) string { | |||
| 		token := tokenizer.Token() | ||||
| 		switch token.Type { | ||||
| 		case html.TextToken: | ||||
| 			buffer.WriteString(token.Data) | ||||
| 			buffer.WriteString(html.EscapeString(token.Data)) | ||||
| 		case html.StartTagToken: | ||||
| 			tagName := token.DataAtom.String() | ||||
| 
 | ||||
|  | @ -72,8 +73,8 @@ func Sanitize(baseURL, input string) string { | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (attrNames []string, html string) { | ||||
| 	var htmlAttrs []string | ||||
| func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([]string, string) { | ||||
| 	var htmlAttrs, attrNames []string | ||||
| 	var err error | ||||
| 
 | ||||
| 	for _, attribute := range attributes { | ||||
|  | @ -99,7 +100,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (a | |||
| 		} | ||||
| 
 | ||||
| 		attrNames = append(attrNames, attribute.Key) | ||||
| 		htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, value)) | ||||
| 		htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value))) | ||||
| 	} | ||||
| 
 | ||||
| 	extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName) | ||||
|  |  | |||
|  | @ -142,3 +142,23 @@ func TestPixelTracker(t *testing.T) { | |||
| 		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestXmlEntities(t *testing.T) { | ||||
| 	input := `<pre>echo "test" > /etc/hosts</pre>` | ||||
| 	expected := `<pre>echo "test" > /etc/hosts</pre>` | ||||
| 	output := Sanitize("http://example.org/", input) | ||||
| 
 | ||||
| 	if expected != output { | ||||
| 		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestEspaceAttributes(t *testing.T) { | ||||
| 	input := `<td rowspan="<b>test</b>">test</td>` | ||||
| 	expected := `<td rowspan="<b>test</b>">test</td>` | ||||
| 	output := Sanitize("http://example.org/", input) | ||||
| 
 | ||||
| 	if expected != output { | ||||
| 		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Frédéric Guillot
						Frédéric Guillot