From 054df415f94abe1e517a729e53cdb325d592d31b Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 30 Dec 2015 18:16:53 +0100 Subject: [PATCH] Optimize CSS expressions produced by Nokogiri Nokogiri produces inefficient XPath expressions when given CSS expressions such as "a.gfm". Luckily these expressions can be optimized quite easily while still achieving the same results. In the two cases where this optimization is applied the run time has been reduced from around 170 ms to around 15 ms. --- lib/banzai/filter/redactor_filter.rb | 2 +- lib/banzai/filter/reference_gatherer_filter.rb | 2 +- lib/banzai/querying.rb | 18 ++++++++++++++++++ spec/lib/banzai/querying_spec.rb | 13 +++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 lib/banzai/querying.rb create mode 100644 spec/lib/banzai/querying_spec.rb diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/redactor_filter.rb index f01a32b5ae5..66f77902319 100644 --- a/lib/banzai/filter/redactor_filter.rb +++ b/lib/banzai/filter/redactor_filter.rb @@ -10,7 +10,7 @@ module Banzai # class RedactorFilter < HTML::Pipeline::Filter def call - doc.css('a.gfm').each do |node| + Querying.css(doc, 'a.gfm').each do |node| unless user_can_see_reference?(node) # The reference should be replaced by the original text, # which is not always the same as the rendered text. diff --git a/lib/banzai/filter/reference_gatherer_filter.rb b/lib/banzai/filter/reference_gatherer_filter.rb index 12412ff7ea9..bef04112919 100644 --- a/lib/banzai/filter/reference_gatherer_filter.rb +++ b/lib/banzai/filter/reference_gatherer_filter.rb @@ -16,7 +16,7 @@ module Banzai end def call - doc.css('a.gfm').each do |node| + Querying.css(doc, 'a.gfm').each do |node| gather_references(node) end diff --git a/lib/banzai/querying.rb b/lib/banzai/querying.rb new file mode 100644 index 00000000000..1e1b51e683e --- /dev/null +++ b/lib/banzai/querying.rb @@ -0,0 +1,18 @@ +module Banzai + module Querying + # Searches a Nokogiri document using a CSS query, optionally optimizing it + # whenever possible. + # + # document - A document/element to search. + # query - The CSS query to use. + # + # Returns a Nokogiri::XML::NodeSet. + def self.css(document, query) + # When using "a.foo" Nokogiri compiles this to "//a[...]" but + # "descendant::a[...]" is quite a bit faster and achieves the same result. + xpath = Nokogiri::CSS.xpath_for(query)[0].gsub(%r{^//}, 'descendant::') + + document.xpath(xpath) + end + end +end diff --git a/spec/lib/banzai/querying_spec.rb b/spec/lib/banzai/querying_spec.rb new file mode 100644 index 00000000000..27da2a7439c --- /dev/null +++ b/spec/lib/banzai/querying_spec.rb @@ -0,0 +1,13 @@ +require 'spec_helper' + +describe Banzai::Querying do + describe '.css' do + it 'optimizes queries for elements with classes' do + document = double(:document) + + expect(document).to receive(:xpath).with(/^descendant::a/) + + described_class.css(document, 'a.gfm') + end + end +end