Basic table removal rule
This commit is contained in:
parent
9a826bbe6f
commit
49d2596fc6
3 changed files with 43 additions and 0 deletions
|
@ -335,3 +335,34 @@ func parseMarkdown(entryContent string) string {
|
||||||
|
|
||||||
return sb.String()
|
return sb.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func removeTables(entryContent string) string {
|
||||||
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||||
|
if err != nil {
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
|
var table *goquery.Selection
|
||||||
|
|
||||||
|
for {
|
||||||
|
table = doc.Find("table").First()
|
||||||
|
|
||||||
|
if table.Length() == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
td := table.Find("td").First()
|
||||||
|
|
||||||
|
if td.Length() == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
tdHtml, _ := td.Html()
|
||||||
|
|
||||||
|
table.Parent().AppendHtml(tdHtml)
|
||||||
|
table.Remove()
|
||||||
|
}
|
||||||
|
|
||||||
|
output, _ := doc.Find("body").First().Html()
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
|
@ -110,6 +110,8 @@ func applyRule(entryURL, entryContent string, rule rule) string {
|
||||||
}
|
}
|
||||||
case "parse_markdown":
|
case "parse_markdown":
|
||||||
entryContent = parseMarkdown(entryContent)
|
entryContent = parseMarkdown(entryContent)
|
||||||
|
case "remove_tables":
|
||||||
|
entryContent = removeTables(entryContent)
|
||||||
}
|
}
|
||||||
|
|
||||||
return entryContent
|
return entryContent
|
||||||
|
|
|
@ -325,3 +325,13 @@ func TestRewriteBase64DecodeArgs(t *testing.T) {
|
||||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRewriteRemoveTables(t *testing.T) {
|
||||||
|
content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td>Hello World!</td></tr></tbody></table></td></tr></tbody></table>`
|
||||||
|
expected := `<p>Test</p>Hello World!`
|
||||||
|
output := Rewriter("https://example.org/article", content, `remove_tables`)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue