From 7d69ff3ddf0fb83c6a1ec02f85b01b454080b647 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Fri, 16 Sep 2016 11:34:49 +0530 Subject: [PATCH] Move cycle analytics calculations to SQL. 1. Use Arel for composable queries. 2. For a project with ~10k issues, the page loads in around 600ms. Previously, a project with ~5k issues would have a ~20s page load time. --- app/models/concerns/database_median.rb | 54 ++++++++++ app/models/cycle_analytics.rb | 101 +++++++++++------- .../cycle_analytics/table_references.rb | 25 +++++ 3 files changed, 144 insertions(+), 36 deletions(-) create mode 100644 app/models/concerns/database_median.rb create mode 100644 app/models/cycle_analytics/table_references.rb diff --git a/app/models/concerns/database_median.rb b/app/models/concerns/database_median.rb new file mode 100644 index 00000000000..5af1ca772ae --- /dev/null +++ b/app/models/concerns/database_median.rb @@ -0,0 +1,54 @@ +module DatabaseMedian + extend ActiveSupport::Concern + + def median_datetime(arel_table, query_so_far, column_sym) + # TODO: MySQL + pg_median_datetime(arel_table, query_so_far, column_sym) + end + + + # https://www.periscopedata.com/blog/medians-in-sql.html + def pg_median_datetime(arel_table, query_so_far, column_sym) + # Create a CTE with the column we're operating on, row number (after sorting by the column + # we're operating on), and count of the table we're operating on (duplicated across) all rows + # of the CTE. For example, if we're looking to find the median of the `projects.star_count` + # column, the CTE might look like this: + # + # star_count | row_id | ct + # ------------+--------+---- + # 5 | 1 | 3 + # 9 | 2 | 3 + # 15 | 3 | 3 + cte_table = Arel::Table.new(("ordered_records")) + cte = Arel::Nodes::As.new(cte_table, + arel_table. + project(arel_table[column_sym].as(column_sym.to_s), + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), + Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), + arel_table.project("COUNT(1)").as('ct')). + # Disallow negative values + where(arel_table[column_sym].gteq(zero_interval))) + + # From the CTE, select either the middle row or the middle two rows (this is accomplished + # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the + # selected rows, and this is the median value. + cte_table.project(Arel::Nodes::NamedFunction.new("AVG", + [extract_epoch(cte_table[column_sym])], + "median")). + where(Arel::Nodes::Between.new(cte_table[:row_id], + Arel::Nodes::And.new([(cte_table[:ct] / Arel::Nodes::SqlLiteral.new('2.0')), + (cte_table[:ct] / Arel::Nodes::SqlLiteral.new('2.0') + 1)]))). + with(query_so_far, cte) + end + + private + + def extract_epoch(arel_attribute) + Arel::Nodes::SqlLiteral.new("EXTRACT(EPOCH FROM \"#{arel_attribute.relation.name}\".\"#{arel_attribute.name}\")") + end + + # Need to cast '0' to an INTERVAL before we can check if the interval is positive + def zero_interval + Arel::Nodes::NamedFunction.new("CAST", [Arel::Nodes::SqlLiteral.new("'0' AS INTERVAL")]) + end +end diff --git a/app/models/cycle_analytics.rb b/app/models/cycle_analytics.rb index 17a80115fe5..c26d50d693c 100644 --- a/app/models/cycle_analytics.rb +++ b/app/models/cycle_analytics.rb @@ -1,9 +1,9 @@ class CycleAnalytics - attr_reader :from + include DatabaseMedian def initialize(project, from:) + @project = project @from = from - @queries = Queries.new(project) end def as_json(options = {}) @@ -14,65 +14,94 @@ class CycleAnalytics end def issue - calculate_metric(@queries.issues(created_after: @from), - -> (data_point) { data_point[:issue].created_at }, - [@queries.issue_first_associated_with_milestone_at, @queries.issue_first_added_to_list_label_at]) + calculate_metric!(:issue, + TableReferences.issues[:created_at], + [TableReferences.issue_metrics[:first_associated_with_milestone_at], + TableReferences.issue_metrics[:first_added_to_board_at]], + load_merge_requests: false) end def plan - calculate_metric(@queries.issues(created_after: @from), - [@queries.issue_first_associated_with_milestone_at, @queries.issue_first_added_to_list_label_at], - @queries.issue_first_mentioned_in_commit_at) + calculate_metric!(:plan, + [TableReferences.issue_metrics[:first_associated_with_milestone_at], + TableReferences.issue_metrics[:first_added_to_board_at]], + TableReferences.issue_metrics[:first_mentioned_in_commit_at], + load_merge_requests: false) end def code - calculate_metric(@queries.merge_requests_closing_issues(created_after: @from), - @queries.issue_first_mentioned_in_commit_at, - -> (data_point) { data_point[:merge_request].created_at }) + calculate_metric!(:code, + TableReferences.issue_metrics[:first_mentioned_in_commit_at], + TableReferences.merge_requests[:created_at], + load_merge_requests: true) end def test - calculate_metric(@queries.merge_requests_closing_issues(created_after: @from), - @queries.merge_request_build_started_at, - @queries.merge_request_build_finished_at) + calculate_metric!(:test, + TableReferences.merge_request_metrics[:latest_build_started_at], + TableReferences.merge_request_metrics[:latest_build_finished_at], + load_merge_requests: true) end def review - calculate_metric(@queries.merge_requests_closing_issues(created_after: @from), - -> (data_point) { data_point[:merge_request].created_at }, - @queries.merge_request_merged_at) + calculate_metric!(:review, + TableReferences.merge_requests[:created_at], + TableReferences.merge_request_metrics[:merged_at], + load_merge_requests: true) end def staging - calculate_metric(@queries.merge_requests_closing_issues(created_after: @from), - @queries.merge_request_merged_at, - @queries.merge_request_deployed_to_production_at) + calculate_metric!(:staging, + TableReferences.merge_request_metrics[:merged_at], + TableReferences.merge_request_metrics[:first_deployed_to_production_at], + load_merge_requests: true) end def production - calculate_metric(@queries.merge_requests_closing_issues(created_after: @from), - -> (data_point) { data_point[:issue].created_at }, - @queries.merge_request_deployed_to_production_at) + calculate_metric!(:production, + TableReferences.issues[:created_at], + TableReferences.merge_request_metrics[:first_deployed_to_production_at], + load_merge_requests: true) end private - def calculate_metric(data, start_time_fns, end_time_fns) - times = data.map do |data_point| - start_time = Array.wrap(start_time_fns).map { |fn| fn[data_point] }.compact.first - end_time = Array.wrap(end_time_fns).map { |fn| fn[data_point] }.compact.first + def calculate_metric!(name, start_time_attrs, end_time_attrs, load_merge_requests: false) + cte_table = Arel::Table.new("cte_table_for_#{name}") - if start_time.present? && end_time.present? && end_time >= start_time - end_time - start_time - end - end + # Add a `SELECT` for (end_time - start-time), and add an alias for it. + # Note: We use COALESCE to pick up the first non-null column for end_time / start_time. + query = Arel::Nodes::As.new( + cte_table, + base_query(load_merge_requests: load_merge_requests).project( + Arel::Nodes::Subtraction.new( + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs)), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs)) + ).as(name.to_s))) - median(times.compact.sort) + query = median_datetime(cte_table, query, name) + median = ActiveRecord::Base.connection.execute(query.to_sql).first['median'] + median.to_f if median.present? end - def median(coll) - return if coll.empty? - size = coll.length - (coll[size / 2] + coll[(size - 1) / 2]) / 2.0 + # Join table with a row for every pair (where the merge request + # closes the given issue) with issue and merge request metrics included. The metrics + # are loaded with an inner join, so issues / merge requests without metrics are + # automatically excluded. + def base_query(load_merge_requests:) + arel_table = TableReferences.merge_requests_closing_issues + + query = arel_table.join(TableReferences.issues).on(TableReferences.issues[:id].eq(arel_table[:issue_id])). + join(TableReferences.issue_metrics).on(TableReferences.issues[:id].eq(TableReferences.issue_metrics[:issue_id])). + where(TableReferences.issues[:project_id].eq(@project.id)). + where(TableReferences.issues[:deleted_at].eq(nil)). + where(TableReferences.issues[:created_at].gteq(@from)) + + if load_merge_requests + query.join(TableReferences.merge_requests, Arel::Nodes::OuterJoin).on(TableReferences.merge_requests[:id].eq(arel_table[:merge_request_id])). + join(TableReferences.merge_request_metrics).on(TableReferences.merge_requests[:id].eq(TableReferences.merge_request_metrics[:merge_request_id])) + else + query + end end end diff --git a/app/models/cycle_analytics/table_references.rb b/app/models/cycle_analytics/table_references.rb new file mode 100644 index 00000000000..f276723ee0e --- /dev/null +++ b/app/models/cycle_analytics/table_references.rb @@ -0,0 +1,25 @@ +class CycleAnalytics + module TableReferences + class << self + def issues + Issue.arel_table + end + + def issue_metrics + Issue::Metrics.arel_table + end + + def merge_requests + MergeRequest.arel_table + end + + def merge_request_metrics + MergeRequest::Metrics.arel_table + end + + def merge_requests_closing_issues + MergeRequestsClosingIssues.arel_table + end + end + end +end