Basic table removal rule
This commit is contained in:
parent
9a826bbe6f
commit
49d2596fc6
3 changed files with 43 additions and 0 deletions
|
@ -335,3 +335,34 @@ func parseMarkdown(entryContent string) string {
|
|||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func removeTables(entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
var table *goquery.Selection
|
||||
|
||||
for {
|
||||
table = doc.Find("table").First()
|
||||
|
||||
if table.Length() == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
td := table.Find("td").First()
|
||||
|
||||
if td.Length() == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
tdHtml, _ := td.Html()
|
||||
|
||||
table.Parent().AppendHtml(tdHtml)
|
||||
table.Remove()
|
||||
}
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
|
|
@ -110,6 +110,8 @@ func applyRule(entryURL, entryContent string, rule rule) string {
|
|||
}
|
||||
case "parse_markdown":
|
||||
entryContent = parseMarkdown(entryContent)
|
||||
case "remove_tables":
|
||||
entryContent = removeTables(entryContent)
|
||||
}
|
||||
|
||||
return entryContent
|
||||
|
|
|
@ -325,3 +325,13 @@ func TestRewriteBase64DecodeArgs(t *testing.T) {
|
|||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteRemoveTables(t *testing.T) {
|
||||
content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td>Hello World!</td></tr></tbody></table></td></tr></tbody></table>`
|
||||
expected := `<p>Test</p>Hello World!`
|
||||
output := Rewriter("https://example.org/article", content, `remove_tables`)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue