Added specs and source documentation for Sherlock

This commit is contained in:
Yorick Peterse 2015-11-05 18:01:05 +01:00
parent dec3e4ce64
commit 265ef867ff
16 changed files with 757 additions and 11 deletions

View file

@ -1,5 +1,10 @@
module Gitlab
module Sherlock
# A collection of transactions recorded by Sherlock.
#
# Method calls for this class are synchronized using a mutex to allow
# sharing of a single Collection instance between threads (e.g. when using
# Puma as a webserver).
class Collection
include Enumerable
@ -34,6 +39,8 @@ module Gitlab
sort { |a, b| b.finished_at <=> a.finished_at }
end
private
def synchronize(&block)
@mutex.synchronize(&block)
end

View file

@ -3,6 +3,10 @@ module Gitlab
class FileSample
attr_reader :id, :file, :line_samples, :events, :duration
# file - The full path to the file this sample belongs to.
# line_samples - An array of LineSample objects.
# duration - The total execution time in milliseconds.
# events - The total amount of events.
def initialize(file, line_samples, duration, events)
@id = SecureRandom.uuid
@file = file

View file

@ -1,12 +1,36 @@
module Gitlab
module Sherlock
# Class for profiling code on a per line basis.
#
# The LineProfiler class can be used to profile code on per line basis
# without littering your code with Ruby implementation specific profiling
# methods.
#
# This profiler only includes samples taking longer than a given threshold
# and those that occur in the actual application (e.g. files from Gems are
# ignored).
class LineProfiler
# The minimum amount of time that has to be spent in a file for it to be
# included in a list of samples.
MINIMUM_DURATION = 10.0
# Profiles the given block.
#
# Example:
#
# profiler = LineProfiler.new
#
# retval, samples = profiler.profile do
# "cats are amazing"
# end
#
# retval # => "cats are amazing"
# samples # => [#<Gitlab::Sherlock::FileSample ...>, ...]
#
# Returns an Array containing the block's return value and an Array of
# FileSample objects.
def profile(&block)
if RUBY_ENGINE == 'ruby'
if mri?
profile_mri(&block)
else
raise NotImplementedError,
@ -14,6 +38,7 @@ module Gitlab
end
end
# Profiles the given block using rblineprof (MRI only).
def profile_mri
retval = nil
samples = lineprof(/^#{Rails.root.to_s}/) { retval = yield }
@ -24,6 +49,11 @@ module Gitlab
end
# Returns an Array of file samples based on the output of rblineprof.
#
# lineprof_stats - A Hash containing rblineprof statistics on a per file
# basis.
#
# Returns an Array of FileSample objects.
def aggregate_rblineprof(lineprof_stats)
samples = []
@ -52,9 +82,15 @@ module Gitlab
samples
end
private
def microsec_to_millisec(microsec)
microsec / 1000.0
end
def mri?
RUBY_ENGINE == 'ruby'
end
end
end
end

View file

@ -3,15 +3,31 @@ module Gitlab
class LineSample
attr_reader :duration, :events
# duration - The execution time in milliseconds.
# events - The amount of events.
def initialize(duration, events)
@duration = duration
@events = events
end
# Returns the sample duration percentage relative to the given duration.
#
# Example:
#
# sample.duration # => 150
# sample.percentage_of(1500) # => 10.0
#
# total_duration - The total duration to compare with.
#
# Returns a float
def percentage_of(total_duration)
(duration.to_f / total_duration) * 100.0
end
# Returns true if the current sample takes up the majority of the given
# duration.
#
# total_duration - The total duration to compare with.
def majority_of?(total_duration)
percentage_of(total_duration) >= 30
end

View file

@ -5,15 +5,19 @@ module Gitlab
SHERLOCK_DIR = File.dirname(__FILE__)
# Creates a new Location from a `Thread::Backtrace::Location`.
def self.from_ruby_location(location)
new(location.path, location.lineno)
end
# path - The full path of the frame as a String.
# line - The line number of the frame as a Fixnum.
def initialize(path, line)
@path = path
@line = line
end
# Returns true if the current frame originated from the application.
def application?
@path.start_with?(Rails.root.to_s) && !path.start_with?(SHERLOCK_DIR)
end

View file

@ -10,6 +10,7 @@ module Gitlab
@app = app
end
# env - A Hash containing Rack environment details.
def call(env)
if instrument?(env)
call_with_instrumentation(env)
@ -19,7 +20,7 @@ module Gitlab
end
def call_with_instrumentation(env)
trans = Transaction.new(env['REQUEST_METHOD'], env['REQUEST_URI'])
trans = transaction_from_env(env)
retval = trans.run { @app.call(env) }
Sherlock.collection.add(trans)
@ -31,6 +32,10 @@ module Gitlab
!!(env['HTTP_ACCEPT'] =~ CONTENT_TYPES &&
env['REQUEST_URI'] !~ IGNORE_PATHS)
end
def transaction_from_env(env)
Transaction.new(env['REQUEST_METHOD'], env['REQUEST_URI'])
end
end
end
end

View file

@ -3,6 +3,7 @@ module Gitlab
class Query
attr_reader :id, :query, :started_at, :finished_at, :backtrace
# SQL identifiers that should be prefixed with newlines.
PREFIX_NEWLINE = /
\s+(FROM
|(LEFT|RIGHT)?INNER\s+JOIN
@ -12,11 +13,20 @@ module Gitlab
|GROUP\s+BY
|ORDER\s+BY
|LIMIT
|OFFSET)\s+
/ix
|OFFSET)\s+/ix # Vim indent breaks when this is on a newline :<
# Creates a new Query using a String and a separate Array of bindings.
#
# query - A String containing a SQL query, optionally with numeric
# placeholders (`$1`, `$2`, etc).
#
# bindings - An Array of ActiveRecord columns and their values.
# started_at - The start time of the query as a Time-like object.
# finished_at - The completion time of the query as a Time-like object.
#
# Returns a new Query object.
def self.new_with_bindings(query, bindings, started_at, finished_at)
bindings.each_with_index do |(column, value), index|
bindings.each_with_index do |(_, value), index|
quoted_value = ActiveRecord::Base.connection.quote(value)
query = query.gsub("$#{index + 1}", quoted_value)
@ -25,6 +35,9 @@ module Gitlab
new(query, started_at, finished_at)
end
# query - The SQL query as a String (without placeholders).
# started_at - The start time of the query as a Time-like object.
# finished_at - The completion time of the query as a Time-like object.
def initialize(query, started_at, finished_at)
@id = SecureRandom.uuid
@query = query
@ -39,6 +52,7 @@ module Gitlab
end
end
# Returns the query duration in milliseconds.
def duration
@duration ||= (@finished_at - @started_at) * 1000.0
end
@ -47,18 +61,22 @@ module Gitlab
@id
end
# Returns a human readable version of the query.
def formatted_query
@formatted_query ||= format_sql(@query)
end
# Returns the last application frame of the backtrace.
def last_application_frame
@last_application_frame ||= @backtrace.find(&:application?)
end
# Returns an Array of application frames (excluding Gems and the likes).
def application_backtrace
@application_backtrace ||= @backtrace.select(&:application?)
end
# Returns the query plan as a String.
def explain
unless @explain
ActiveRecord::Base.connection.transaction do

View file

@ -4,11 +4,12 @@ module Gitlab
attr_reader :id, :type, :path, :queries, :file_samples, :started_at,
:finished_at
# type - The type of transaction (e.g. "GET", "POST", etc)
# path - The path of the transaction (e.g. the HTTP request path)
def initialize(type, path)
@id = SecureRandom.uuid
@type = type
@path = path
@duration = 0
@queries = []
@file_samples = []
@started_at = nil
@ -16,6 +17,7 @@ module Gitlab
@thread = Thread.current
end
# Runs the transaction and returns the block's return value.
def run
@started_at = Time.now
@ -30,34 +32,43 @@ module Gitlab
retval
end
# Returns the duration in seconds.
def duration
@started_at && @finished_at ? @finished_at - @started_at : 0
@duration ||= started_at && finished_at ? finished_at - started_at : 0
end
def to_param
@id
end
# Returns the queries sorted in descending order by their durations.
def sorted_queries
@queries.sort { |a, b| b.duration <=> a.duration }
end
# Returns the file samples sorted in descending order by their durations.
def sorted_file_samples
@file_samples.sort { |a, b| b.duration <=> a.duration }
end
# Finds a query by the given ID.
#
# id - The query ID as a String.
#
# Returns a Query object if one could be found, nil otherwise.
def find_query(id)
@queries.find { |query| query.id == id }
end
# Finds a file sample by the given ID.
#
# id - The query ID as a String.
#
# Returns a FileSample object if one could be found, nil otherwise.
def find_file_sample(id)
@file_samples.find { |sample| sample.id == id }
end
def track_query(query, bindings, start, finish)
@queries << Query.new_with_bindings(query, bindings, start, finish)
end
def profile_lines
retval = nil
@ -70,6 +81,12 @@ module Gitlab
retval
end
private
def track_query(query, bindings, start, finish)
@queries << Query.new_with_bindings(query, bindings, start, finish)
end
def subscribe_to_active_record
ActiveSupport::Notifications.subscribe('sql.active_record') do |_, start, finish, _, data|
# In case somebody uses a multi-threaded server locally (e.g. Puma) we

View file

@ -0,0 +1,82 @@
require 'spec_helper'
describe Gitlab::Sherlock::Collection do
let(:collection) { described_class.new }
let(:transaction) do
Gitlab::Sherlock::Transaction.new('POST', '/cat_pictures')
end
describe '#add' do
it 'adds a new transaction' do
collection.add(transaction)
expect(collection).to_not be_empty
end
it 'is aliased as <<' do
collection << transaction
expect(collection).to_not be_empty
end
end
describe '#each' do
it 'iterates over every transaction' do
collection.add(transaction)
expect { |b| collection.each(&b) }.to yield_with_args(transaction)
end
end
describe '#clear' do
it 'removes all transactions' do
collection.add(transaction)
collection.clear
expect(collection).to be_empty
end
end
describe '#empty?' do
it 'returns true for an empty collection' do
expect(collection).to be_empty
end
it 'returns false for a collection with a transaction' do
collection.add(transaction)
expect(collection).to_not be_empty
end
end
describe '#find_transaction' do
it 'returns the transaction for the given ID' do
collection.add(transaction)
expect(collection.find_transaction(transaction.id)).to eq(transaction)
end
it 'returns nil when no transaction could be found' do
collection.add(transaction)
expect(collection.find_transaction('cats')).to be_nil
end
end
describe '#newest_first' do
it 'returns transactions sorted from new to old' do
trans1 = Gitlab::Sherlock::Transaction.new('POST', '/cat_pictures')
trans2 = Gitlab::Sherlock::Transaction.new('POST', '/more_cat_pictures')
allow(trans1).to receive(:finished_at).and_return(Time.utc(2015, 1, 1))
allow(trans2).to receive(:finished_at).and_return(Time.utc(2015, 1, 2))
collection.add(trans1)
collection.add(trans2)
expect(collection.newest_first).to eq([trans2, trans1])
end
end
end

View file

@ -0,0 +1,54 @@
require 'spec_helper'
describe Gitlab::Sherlock::FileSample do
let(:sample) { described_class.new(__FILE__, [], 150.4, 2) }
describe '#id' do
it 'returns the ID' do
expect(sample.id).to be_an_instance_of(String)
end
end
describe '#file' do
it 'returns the file path' do
expect(sample.file).to eq(__FILE__)
end
end
describe '#line_samples' do
it 'returns the line samples' do
expect(sample.line_samples).to eq([])
end
end
describe '#events' do
it 'returns the total number of events' do
expect(sample.events).to eq(2)
end
end
describe '#duration' do
it 'returns the total execution time' do
expect(sample.duration).to eq(150.4)
end
end
describe '#relative_path' do
it 'returns the relative path' do
expect(sample.relative_path).
to eq('spec/lib/gitlab/sherlock/file_sample_spec.rb')
end
end
describe '#to_param' do
it 'returns the sample ID' do
expect(sample.to_param).to eq(sample.id)
end
end
describe '#source' do
it 'returns the contents of the file' do
expect(sample.source).to eq(File.read(__FILE__))
end
end
end

View file

@ -0,0 +1,73 @@
require 'spec_helper'
describe Gitlab::Sherlock::LineProfiler do
let(:profiler) { described_class.new }
describe '#profile' do
it 'runs the profiler when using MRI' do
allow(profiler).to receive(:mri?).and_return(true)
allow(profiler).to receive(:profile_mri)
profiler.profile { 'cats' }
end
it 'raises NotImplementedError when profiling an unsupported platform' do
allow(profiler).to receive(:mri?).and_return(false)
expect { profiler.profile { 'cats' } }.to raise_error(NotImplementedError)
end
end
describe '#profile_mri' do
it 'returns an Array containing the return value and profiling samples' do
allow(profiler).to receive(:lineprof).
and_yield.
and_return({ __FILE__ => [[0, 0, 0, 0]] })
retval, samples = profiler.profile_mri { 42 }
expect(retval).to eq(42)
expect(samples).to eq([])
end
end
describe '#aggregate_rblineprof' do
let(:raw_samples) do
{ __FILE__ => [[30000, 30000, 5, 0], [15000, 15000, 4, 0]] }
end
it 'returns an Array of FileSample objects' do
samples = profiler.aggregate_rblineprof(raw_samples)
expect(samples).to be_an_instance_of(Array)
expect(samples[0]).to be_an_instance_of(Gitlab::Sherlock::FileSample)
end
describe 'the first FileSample object' do
let(:file_sample) do
profiler.aggregate_rblineprof(raw_samples)[0]
end
it 'uses the correct file path' do
expect(file_sample.file).to eq(__FILE__)
end
it 'contains a list of line samples' do
line_sample = file_sample.line_samples[0]
expect(line_sample).to be_an_instance_of(Gitlab::Sherlock::LineSample)
expect(line_sample.duration).to eq(15.0)
expect(line_sample.events).to eq(4)
end
it 'contains the total file execution time' do
expect(file_sample.duration).to eq(30.0)
end
it 'contains the total amount of file events' do
expect(file_sample.events).to eq(5)
end
end
end
end

View file

@ -0,0 +1,33 @@
require 'spec_helper'
describe Gitlab::Sherlock::LineSample do
let(:sample) { described_class.new(150.0, 4) }
describe '#duration' do
it 'returns the duration' do
expect(sample.duration).to eq(150.0)
end
end
describe '#events' do
it 'returns the amount of events' do
expect(sample.events).to eq(4)
end
end
describe '#percentage_of' do
it 'returns the percentage of 1500.0' do
expect(sample.percentage_of(1500.0)).to be_within(0.1).of(10.0)
end
end
describe '#majority_of' do
it 'returns true if the sample takes up the majority of the given duration' do
expect(sample.majority_of?(500.0)).to eq(true)
end
it "returns false if the sample doesn't take up the majority of the given duration" do
expect(sample.majority_of?(1500.0)).to eq(false)
end
end
end

View file

@ -0,0 +1,40 @@
require 'spec_helper'
describe Gitlab::Sherlock::Location do
let(:location) { described_class.new(__FILE__, 1) }
describe 'from_ruby_location' do
it 'creates a Location from a Thread::Backtrace::Location' do
input = caller_locations[0]
output = described_class.from_ruby_location(input)
expect(output).to be_an_instance_of(described_class)
expect(output.path).to eq(input.path)
expect(output.line).to eq(input.lineno)
end
end
describe '#path' do
it 'returns the file path' do
expect(location.path).to eq(__FILE__)
end
end
describe '#line' do
it 'returns the line number' do
expect(location.line).to eq(1)
end
end
describe '#application?' do
it 'returns true for an application frame' do
expect(location.application?).to eq(true)
end
it 'returns false for a non application frame' do
loc = described_class.new('/tmp/cats.rb', 1)
expect(loc.application?).to eq(false)
end
end
end

View file

@ -0,0 +1,79 @@
require 'spec_helper'
describe Gitlab::Sherlock::Middleware do
let(:app) { double(:app) }
let(:middleware) { described_class.new(app) }
describe '#call' do
describe 'when instrumentation is enabled' do
it 'instruments a request' do
allow(middleware).to receive(:instrument?).and_return(true)
allow(middleware).to receive(:call_with_instrumentation)
middleware.call({})
end
end
describe 'when instrumentation is disabled' do
it "doesn't instrument a request" do
allow(middleware).to receive(:instrument).and_return(false)
allow(app).to receive(:call)
middleware.call({})
end
end
end
describe '#call_with_instrumentation' do
it 'instruments a request' do
trans = double(:transaction)
retval = 'cats are amazing'
env = {}
allow(app).to receive(:call).with(env).and_return(retval)
allow(middleware).to receive(:transaction_from_env).and_return(trans)
allow(trans).to receive(:run).and_yield.and_return(retval)
allow(Gitlab::Sherlock.collection).to receive(:add).with(trans)
middleware.call_with_instrumentation(env)
end
end
describe '#instrument?' do
it 'returns false for a text/css request' do
env = { 'HTTP_ACCEPT' => 'text/css', 'REQUEST_URI' => '/' }
expect(middleware.instrument?(env)).to eq(false)
end
it 'returns false for a request to a Sherlock route' do
env = {
'HTTP_ACCEPT' => 'text/html',
'REQUEST_URI' => '/sherlock/transactions'
}
expect(middleware.instrument?(env)).to eq(false)
end
it 'returns true for a request that should be instrumented' do
env = {
'HTTP_ACCEPT' => 'text/html',
'REQUEST_URI' => '/cats'
}
expect(middleware.instrument?(env)).to eq(true)
end
end
describe '#transaction_from_env' do
it 'returns a Transaction' do
env = {
'HTTP_ACCEPT' => 'text/html',
'REQUEST_URI' => '/cats'
}
expect(middleware.transaction_from_env(env)).
to be_an_instance_of(Gitlab::Sherlock::Transaction)
end
end
end

View file

@ -0,0 +1,113 @@
require 'spec_helper'
describe Gitlab::Sherlock::Query do
let(:started_at) { Time.utc(2015, 1, 1) }
let(:finished_at) { started_at + 5 }
let(:query) do
described_class.new('SELECT COUNT(*) FROM users', started_at, finished_at)
end
describe 'new_with_bindings' do
it 'returns a Query' do
sql = 'SELECT COUNT(*) FROM users WHERE id = $1'
bindings = [[double(:column), 10]]
query = described_class.
new_with_bindings(sql, bindings, started_at, finished_at)
expect(query.query).to eq('SELECT COUNT(*) FROM users WHERE id = 10;')
end
end
describe '#id' do
it 'returns a String' do
expect(query.id).to be_an_instance_of(String)
end
end
describe '#query' do
it 'returns the query with a trailing semi-colon' do
expect(query.query).to eq('SELECT COUNT(*) FROM users;')
end
end
describe '#started_at' do
it 'returns the start time' do
expect(query.started_at).to eq(started_at)
end
end
describe '#finished_at' do
it 'returns the completion time' do
expect(query.finished_at).to eq(finished_at)
end
end
describe '#backtrace' do
it 'returns the backtrace' do
expect(query.backtrace).to be_an_instance_of(Array)
end
end
describe '#duration' do
it 'returns the duration in milliseconds' do
expect(query.duration).to be_within(0.1).of(5000.0)
end
end
describe '#to_param' do
it 'returns the query ID' do
expect(query.to_param).to eq(query.id)
end
end
describe '#formatted_query' do
it 'returns a formatted version of the query' do
expect(query.formatted_query).to eq(<<-EOF.strip)
SELECT COUNT(*)
FROM users;
EOF
end
end
describe '#last_application_frame' do
it 'returns the last application frame' do
frame = query.last_application_frame
expect(frame).to be_an_instance_of(Gitlab::Sherlock::Location)
expect(frame.path).to eq(__FILE__)
end
end
describe '#application_backtrace' do
it 'returns an Array of application frames' do
frames = query.application_backtrace
expect(frames).to be_an_instance_of(Array)
expect(frames).to_not be_empty
frames.each do |frame|
expect(frame.path).to start_with(Rails.root.to_s)
end
end
end
describe '#explain' do
it 'returns the query plan as a String' do
lines = [
['Aggregate (cost=123 rows=1)'],
[' -> Index Only Scan using index_cats_are_amazing']
]
result = double(:result, :values => lines)
allow(query).to receive(:raw_explain).and_return(result)
expect(query.explain).to eq(<<-EOF.strip)
Aggregate (cost=123 rows=1)
-> Index Only Scan using index_cats_are_amazing
EOF
end
end
end

View file

@ -0,0 +1,165 @@
require 'spec_helper'
describe Gitlab::Sherlock::Transaction do
let(:transaction) { described_class.new('POST', '/cat_pictures') }
describe '#id' do
it 'returns the transaction ID' do
expect(transaction.id).to be_an_instance_of(String)
end
end
describe '#type' do
it 'returns the type' do
expect(transaction.type).to eq('POST')
end
end
describe '#path' do
it 'returns the path' do
expect(transaction.path).to eq('/cat_pictures')
end
end
describe '#queries' do
it 'returns an Array of queries' do
expect(transaction.queries).to be_an_instance_of(Array)
end
end
describe '#file_samples' do
it 'returns an Array of file samples' do
expect(transaction.file_samples).to be_an_instance_of(Array)
end
end
describe '#started_at' do
it 'returns the start time' do
allow(transaction).to receive(:profile_lines).and_yield
transaction.run { 'cats are amazing' }
expect(transaction.started_at).to be_an_instance_of(Time)
end
end
describe '#finished_at' do
it 'returns the completion time' do
allow(transaction).to receive(:profile_lines).and_yield
transaction.run { 'cats are amazing' }
expect(transaction.finished_at).to be_an_instance_of(Time)
end
end
describe '#run' do
it 'runs the transaction' do
allow(transaction).to receive(:profile_lines).and_yield
retval = transaction.run { 'cats are amazing' }
expect(retval).to eq('cats are amazing')
end
end
describe '#duration' do
it 'returns the duration in seconds' do
start_time = Time.now
allow(transaction).to receive(:started_at).and_return(start_time)
allow(transaction).to receive(:finished_at).and_return(start_time + 5)
expect(transaction.duration).to be_within(0.1).of(5.0)
end
end
describe '#to_param' do
it 'returns the transaction ID' do
expect(transaction.to_param).to eq(transaction.id)
end
end
describe '#sorted_queries' do
it 'returns the queries in descending order' do
start_time = Time.now
query1 = Gitlab::Sherlock::Query.new('SELECT 1', start_time, start_time)
query2 = Gitlab::Sherlock::Query.
new('SELECT 2', start_time, start_time + 5)
transaction.queries << query1
transaction.queries << query2
expect(transaction.sorted_queries).to eq([query2, query1])
end
end
describe '#sorted_file_samples' do
it 'returns the file samples in descending order' do
sample1 = Gitlab::Sherlock::FileSample.new(__FILE__, [], 10.0, 1)
sample2 = Gitlab::Sherlock::FileSample.new(__FILE__, [], 15.0, 1)
transaction.file_samples << sample1
transaction.file_samples << sample2
expect(transaction.sorted_file_samples).to eq([sample2, sample1])
end
end
describe '#find_query' do
it 'returns a Query when found' do
query = Gitlab::Sherlock::Query.new('SELECT 1', Time.now, Time.now)
transaction.queries << query
expect(transaction.find_query(query.id)).to eq(query)
end
it 'returns nil when no query could be found' do
expect(transaction.find_query('cats')).to be_nil
end
end
describe '#find_file_sample' do
it 'returns a FileSample when found' do
sample = Gitlab::Sherlock::FileSample.new(__FILE__, [], 10.0, 1)
transaction.file_samples << sample
expect(transaction.find_file_sample(sample.id)).to eq(sample)
end
it 'returns nil when no file sample could be found' do
expect(transaction.find_file_sample('cats')).to be_nil
end
end
describe '#profile_lines' do
describe 'when line profiling is enabled' do
it 'yields the block using the line profiler' do
allow(Gitlab::Sherlock).to receive(:enable_line_profiler?).
and_return(true)
allow_any_instance_of(Gitlab::Sherlock::LineProfiler).
to receive(:profile).and_return('cats are amazing', [])
retval = transaction.profile_lines { 'cats are amazing' }
expect(retval).to eq('cats are amazing')
end
end
describe 'when line profiling is disabled' do
it 'yields the block' do
allow(Gitlab::Sherlock).to receive(:enable_line_profiler?).
and_return(false)
retval = transaction.profile_lines { 'cats are amazing' }
expect(retval).to eq('cats are amazing')
end
end
end
end