Refactor TableOfContentsFilter's nested table of contents

Most of the logic is now self-contained within the `HeaderNode` class.
This commit is contained in:
Robert Speicher 2017-08-29 14:08:59 -04:00
parent 2a055c23c2
commit b43aefbd9d
2 changed files with 97 additions and 71 deletions

View File

@ -15,7 +15,6 @@ module Banzai
# `li` child elements.
class TableOfContentsFilter < HTML::Pipeline::Filter
PUNCTUATION_REGEXP = /[^\p{Word}\- ]/u
HeaderNode = Struct.new(:level, :href, :text, :children, :parent)
def call
return doc if context[:no_header_anchors]
@ -23,56 +22,34 @@ module Banzai
result[:toc] = ""
headers = Hash.new(0)
# root node of header-tree
header_root = HeaderNode.new(0, nil, nil, [], nil)
current_header = header_root
header_root = current_header = HeaderNode.new
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
text = node.text
id = text.downcase
id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
id.tr!(' ', '-') # replace spaces with dash
id.squeeze!('-') # replace multiple dashes with one
uniq = (headers[id] > 0) ? "-#{headers[id]}" : ''
headers[id] += 1
if header_content = node.children.first
# namespace detection will be automatically handled via javascript (see issue #22781)
namespace = "user-content-"
id = node
.text
.downcase
.gsub(PUNCTUATION_REGEXP, '') # remove punctuation
.tr(' ', '-') # replace spaces with dash
.squeeze('-') # replace multiple dashes with one
uniq = headers[id] > 0 ? "-#{headers[id]}" : ''
headers[id] += 1
href = "#{id}#{uniq}"
level = node.name[1].to_i # get this header level
if level == current_header.level
# same as previous
parent = current_header.parent
elsif level > current_header.level
# larger (weaker) than previous
parent = current_header
else
# smaller (stronger) than previous
# search parent
parent = current_header
parent = parent.parent while parent.level >= level
end
current_header = HeaderNode.new(node: node, href: href, previous_header: current_header)
# create header-node and push as child
header_node = HeaderNode.new(level, href, text, [], parent)
parent.children.push(header_node)
current_header = header_node
header_content.add_previous_sibling(anchor_tag("#{namespace}#{href}", href))
header_content.add_previous_sibling(anchor_tag(href))
end
end
# extract header-tree
if header_root.children.length > 0
result[:toc] = %Q{<ul class="section-nav">\n}
result[:toc] = %q{<ul class="section-nav">}
header_root.children.each do |child|
push_toc(child)
end
result[:toc] << '</ul>'
end
@ -81,20 +58,65 @@ module Banzai
private
def anchor_tag(id, href)
%Q{<a id="#{id}" class="anchor" href="##{href}" aria-hidden="true"></a>}
def anchor_tag(href)
%Q{<a id="user-content-#{href}" class="anchor" href="##{href}" aria-hidden="true"></a>}
end
def push_toc(header_node)
result[:toc] << %Q{<li><a href="##{header_node.href}">#{header_node.text}</a>}
if header_node.children.length > 0
result[:toc] << '<ul>'
header_node.children.each do |child|
push_toc(child)
end
result[:toc] << '</ul>'
end
result[:toc] << '</li>\n'
result[:toc] << '</li>'
end
class HeaderNode
attr_reader :node, :href, :parent, :children
def initialize(node: nil, href: nil, previous_header: nil)
@node = node
@href = href
@children = []
find_parent(previous_header)
end
def level
return 0 unless node
@level ||= node.name[1].to_i
end
def text
return '' unless node
@text ||= node.text
end
private
def find_parent(previous_header)
return unless previous_header
if level == previous_header.level
@parent = previous_header.parent
elsif level > previous_header.level
@parent = previous_header
else
@parent = previous_header
@parent = @parent.parent while @parent.level >= level
end
@parent.children.push(self)
end
end
end
end

View File

@ -78,7 +78,7 @@ describe Banzai::Filter::TableOfContentsFilter do
HTML::Pipeline.new([described_class]).call(html)
end
let(:results) { result(header(1, 'Header 1') + header(2, 'Header 1-1') + header(3, 'Header 1-1-1') + header(2, 'Header 1-2') + header(1, 'Header 2') + header(2, 'Header 2-1')) }
let(:results) { result(header(1, 'Header 1') + header(2, 'Header 2')) }
let(:doc) { Nokogiri::XML::DocumentFragment.parse(results[:toc]) }
it 'is contained within a `ul` element' do
@ -87,46 +87,50 @@ describe Banzai::Filter::TableOfContentsFilter do
end
it 'contains an `li` element for each header' do
expect(doc.css('li').length).to eq 6
expect(doc.css('li').length).to eq 2
links = doc.css('li a')
expect(links[0].attr('href')).to eq '#header-1'
expect(links[0].text).to eq 'Header 1'
expect(links[1].attr('href')).to eq '#header-1-1'
expect(links[1].text).to eq 'Header 1-1'
expect(links[2].attr('href')).to eq '#header-1-1-1'
expect(links[2].text).to eq 'Header 1-1-1'
expect(links[3].attr('href')).to eq '#header-1-2'
expect(links[3].text).to eq 'Header 1-2'
expect(links[4].attr('href')).to eq '#header-2'
expect(links[4].text).to eq 'Header 2'
expect(links[5].attr('href')).to eq '#header-2-1'
expect(links[5].text).to eq 'Header 2-1'
expect(links.first.attr('href')).to eq '#header-1'
expect(links.first.text).to eq 'Header 1'
expect(links.last.attr('href')).to eq '#header-2'
expect(links.last.text).to eq 'Header 2'
end
it 'keeps list levels regarding header levels' do
items = doc.css('li')
context 'table of contents nesting' do
let(:results) do
result(
header(1, 'Header 1') <<
header(2, 'Header 1-1') <<
header(3, 'Header 1-1-1') <<
header(2, 'Header 1-2') <<
header(1, 'Header 2') <<
header(2, 'Header 2-1')
)
end
# Header 1
expect(items[0].ancestors.any? {|node| node.name == 'li'}).to eq false
it 'keeps list levels regarding header levels' do
items = doc.css('li')
# Header 1-1
expect(items[1].ancestors.include?(items[0])).to eq true
# Header 1
expect(items[0].ancestors).to satisfy_none { |node| node.name == 'li'}
# Header 1-1-1
expect(items[2].ancestors.include?(items[0])).to eq true
expect(items[2].ancestors.include?(items[1])).to eq true
# Header 1-1
expect(items[1].ancestors).to include(items[0])
# Header 1-2
expect(items[3].ancestors.include?(items[0])).to eq true
expect(items[3].ancestors.include?(items[1])).to eq false
# Header 1-1-1
expect(items[2].ancestors).to include(items[0], items[1])
# Header 2
expect(items[4].ancestors.any? {|node| node.name == 'li'}).to eq false
# Header 1-2
expect(items[3].ancestors).to include(items[0])
expect(items[3].ancestors).not_to include(items[1])
# Header 2-1
expect(items[5].ancestors.include?(items[4])).to eq true
# Header 2
expect(items[4].ancestors).to satisfy_none { |node| node.name == 'li'}
# Header 2-1
expect(items[5].ancestors).to include(items[4])
end
end
end
end