From 3077cb52d904154b98ee3e9aced5b3aadae86941 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 30 Dec 2015 17:16:02 +0100 Subject: [PATCH 1/3] Use XPath for searching link nodes This is a tad faster than letting Nokogiri figure out whether it should evaluate the query as CSS or XPath and then actually evaluating it. --- lib/banzai/filter/reference_filter.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/banzai/filter/reference_filter.rb b/lib/banzai/filter/reference_filter.rb index 8ca05ace88c..7198a8b03e2 100644 --- a/lib/banzai/filter/reference_filter.rb +++ b/lib/banzai/filter/reference_filter.rb @@ -124,7 +124,7 @@ module Banzai def replace_link_nodes_with_text(pattern) return doc if project.nil? - doc.search('a').each do |node| + doc.xpath('descendant-or-self::a').each do |node| klass = node.attr('class') next if klass && klass.include?('gfm') @@ -162,7 +162,7 @@ module Banzai def replace_link_nodes_with_href(pattern) return doc if project.nil? - doc.search('a').each do |node| + doc.xpath('descendant-or-self::a').each do |node| klass = node.attr('class') next if klass && klass.include?('gfm') From d3951dfaa13b9aaf11695ef10fa63456ac75cc48 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 30 Dec 2015 17:16:59 +0100 Subject: [PATCH 2/3] Don't use delegate to delegate trivial methods Around 300 ms (in total) would be spent in these delegated methods due to the extra stuff ActiveSupport adds to the compiled methods. Because these delegations are so simple we can just manually define the methods, saving around 275 milliseconds. --- lib/banzai/filter/abstract_reference_filter.rb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb index 63ad8910c0f..230387c8383 100644 --- a/lib/banzai/filter/abstract_reference_filter.rb +++ b/lib/banzai/filter/abstract_reference_filter.rb @@ -47,7 +47,17 @@ module Banzai { object_sym => LazyReference.new(object_class, node.attr(data_reference)) } end - delegate :object_class, :object_sym, :references_in, to: :class + def object_class + self.class.object_class + end + + def object_sym + self.class.object_sym + end + + def references_in(*args, &block) + self.class.references_in(*args, &block) + end def find_object(project, id) # Implement in child class From 054df415f94abe1e517a729e53cdb325d592d31b Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 30 Dec 2015 18:16:53 +0100 Subject: [PATCH 3/3] Optimize CSS expressions produced by Nokogiri Nokogiri produces inefficient XPath expressions when given CSS expressions such as "a.gfm". Luckily these expressions can be optimized quite easily while still achieving the same results. In the two cases where this optimization is applied the run time has been reduced from around 170 ms to around 15 ms. --- lib/banzai/filter/redactor_filter.rb | 2 +- lib/banzai/filter/reference_gatherer_filter.rb | 2 +- lib/banzai/querying.rb | 18 ++++++++++++++++++ spec/lib/banzai/querying_spec.rb | 13 +++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 lib/banzai/querying.rb create mode 100644 spec/lib/banzai/querying_spec.rb diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/redactor_filter.rb index f01a32b5ae5..66f77902319 100644 --- a/lib/banzai/filter/redactor_filter.rb +++ b/lib/banzai/filter/redactor_filter.rb @@ -10,7 +10,7 @@ module Banzai # class RedactorFilter < HTML::Pipeline::Filter def call - doc.css('a.gfm').each do |node| + Querying.css(doc, 'a.gfm').each do |node| unless user_can_see_reference?(node) # The reference should be replaced by the original text, # which is not always the same as the rendered text. diff --git a/lib/banzai/filter/reference_gatherer_filter.rb b/lib/banzai/filter/reference_gatherer_filter.rb index 12412ff7ea9..bef04112919 100644 --- a/lib/banzai/filter/reference_gatherer_filter.rb +++ b/lib/banzai/filter/reference_gatherer_filter.rb @@ -16,7 +16,7 @@ module Banzai end def call - doc.css('a.gfm').each do |node| + Querying.css(doc, 'a.gfm').each do |node| gather_references(node) end diff --git a/lib/banzai/querying.rb b/lib/banzai/querying.rb new file mode 100644 index 00000000000..1e1b51e683e --- /dev/null +++ b/lib/banzai/querying.rb @@ -0,0 +1,18 @@ +module Banzai + module Querying + # Searches a Nokogiri document using a CSS query, optionally optimizing it + # whenever possible. + # + # document - A document/element to search. + # query - The CSS query to use. + # + # Returns a Nokogiri::XML::NodeSet. + def self.css(document, query) + # When using "a.foo" Nokogiri compiles this to "//a[...]" but + # "descendant::a[...]" is quite a bit faster and achieves the same result. + xpath = Nokogiri::CSS.xpath_for(query)[0].gsub(%r{^//}, 'descendant::') + + document.xpath(xpath) + end + end +end diff --git a/spec/lib/banzai/querying_spec.rb b/spec/lib/banzai/querying_spec.rb new file mode 100644 index 00000000000..27da2a7439c --- /dev/null +++ b/spec/lib/banzai/querying_spec.rb @@ -0,0 +1,13 @@ +require 'spec_helper' + +describe Banzai::Querying do + describe '.css' do + it 'optimizes queries for elements with classes' do + document = double(:document) + + expect(document).to receive(:xpath).with(/^descendant::a/) + + described_class.css(document, 'a.gfm') + end + end +end