From 8957293d9bd0d711db3af26182205c2fe4125194 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Tue, 20 Sep 2016 15:06:54 +0530 Subject: [PATCH] Implement review comments from @yorickpeterse 1. Change multiple updates to a single `update_all` 2. Use cascading deletes 3. Extract an average function for the database median. 4. Move database median to `lib/gitlab/database` 5. Use `delete_all` instead of `destroy_all` 6. Minor refactoring --- app/models/ci/pipeline.rb | 17 ++-- app/models/concerns/database_median.rb | 76 ------------------ app/models/cycle_analytics.rb | 6 +- app/models/cycle_analytics/queries.rb | 80 ------------------- app/models/deployment.rb | 31 +++++++ app/models/environment.rb | 4 + app/models/issue.rb | 6 +- app/models/issue/metrics.rb | 4 - app/models/merge_request.rb | 6 +- app/models/merge_request/metrics.rb | 12 --- app/services/create_deployment_service.rb | 28 +------ app/services/git_push_service.rb | 4 +- db/fixtures/development/17_cycle_analytics.rb | 2 +- .../20160824124900_add_table_issue_metrics.rb | 6 +- ...5052008_add_table_merge_request_metrics.rb | 6 +- ...21_create_merge_requests_closing_issues.rb | 4 +- lib/gitlab/database/median.rb | 80 +++++++++++++++++++ 17 files changed, 147 insertions(+), 225 deletions(-) delete mode 100644 app/models/concerns/database_median.rb delete mode 100644 app/models/cycle_analytics/queries.rb create mode 100644 lib/gitlab/database/median.rb diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index cbb4f60cd32..d0997fdec19 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -57,15 +57,13 @@ module Ci end after_transition [:created, :pending] => :running do |pipeline| - pipeline.merge_requests.each do |merge_request| - merge_request.metrics.record_latest_build_start_time!(pipeline.started_at) if merge_request.metrics.present? - end + MergeRequest::Metrics.where(merge_request_id: pipeline.merge_requests.map(&:id)). + update_all(latest_build_started_at: pipeline.started_at, latest_build_finished_at: nil) end after_transition any => [:success] do |pipeline| - pipeline.merge_requests.each do |merge_request| - merge_request.metrics.record_latest_build_finish_time!(pipeline.finished_at) if merge_request.metrics.present? - end + MergeRequest::Metrics.where(merge_request_id: pipeline.merge_requests.map(&:id)). + update_all(latest_build_finished_at: pipeline.finished_at) end before_transition do |pipeline| @@ -292,8 +290,11 @@ module Ci # Merge requests for which the current pipeline is running against # the merge request's latest commit. def merge_requests - project.merge_requests.where(source_branch: self.ref). - select { |merge_request| merge_request.pipeline.try(:id) == self.id } + @merge_requests ||= + begin + project.merge_requests.where(source_branch: self.ref). + select { |merge_request| merge_request.pipeline.try(:id) == self.id } + end end private diff --git a/app/models/concerns/database_median.rb b/app/models/concerns/database_median.rb deleted file mode 100644 index 19a495a05b8..00000000000 --- a/app/models/concerns/database_median.rb +++ /dev/null @@ -1,76 +0,0 @@ -# https://www.periscopedata.com/blog/medians-in-sql.html -module DatabaseMedian - extend ActiveSupport::Concern - - def median_datetime(arel_table, query_so_far, column_sym) - case ActiveRecord::Base.connection.adapter_name - when 'PostgreSQL' - pg_median_datetime(arel_table, query_so_far, column_sym) - when 'Mysql2' - mysql_median_datetime(arel_table, query_so_far, column_sym) - else - raise NotImplementedError, "We haven't implemented a database median strategy for your database type." - end - end - - def mysql_median_datetime(arel_table, query_so_far, column_sym) - query = arel_table. - from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). - project(Arel::Nodes::NamedFunction.new("AVG", [arel_table[column_sym]]).as('median')). - where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"), - Arel::Nodes::And.new([Arel.sql('@ct/2.0'), - Arel.sql('@ct/2.0 + 1')]))). - # Disallow negative values - where(arel_table[column_sym].gteq(0)) - - [Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), - Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), - Arel.sql("set @row_id := 0;"), - query, - Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")] - end - - def pg_median_datetime(arel_table, query_so_far, column_sym) - # Create a CTE with the column we're operating on, row number (after sorting by the column - # we're operating on), and count of the table we're operating on (duplicated across) all rows - # of the CTE. For example, if we're looking to find the median of the `projects.star_count` - # column, the CTE might look like this: - # - # star_count | row_id | ct - # ------------+--------+---- - # 5 | 1 | 3 - # 9 | 2 | 3 - # 15 | 3 | 3 - cte_table = Arel::Table.new("ordered_records") - cte = Arel::Nodes::As.new(cte_table, - arel_table. - project(arel_table[column_sym].as(column_sym.to_s), - Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), - Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), - arel_table.project("COUNT(1)").as('ct')). - # Disallow negative values - where(arel_table[column_sym].gteq(zero_interval))) - - # From the CTE, select either the middle row or the middle two rows (this is accomplished - # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the - # selected rows, and this is the median value. - cte_table.project(Arel::Nodes::NamedFunction.new("AVG", - [extract_epoch(cte_table[column_sym])], - "median")). - where(Arel::Nodes::Between.new(cte_table[:row_id], - Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')), - (cte_table[:ct] / Arel.sql('2.0') + 1)]))). - with(query_so_far, cte) - end - - private - - def extract_epoch(arel_attribute) - Arel.sql("EXTRACT(EPOCH FROM \"#{arel_attribute.relation.name}\".\"#{arel_attribute.name}\")") - end - - # Need to cast '0' to an INTERVAL before we can check if the interval is positive - def zero_interval - Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")]) - end -end diff --git a/app/models/cycle_analytics.rb b/app/models/cycle_analytics.rb index ef3f1bbcab5..d5f1c754dad 100644 --- a/app/models/cycle_analytics.rb +++ b/app/models/cycle_analytics.rb @@ -1,5 +1,5 @@ class CycleAnalytics - include DatabaseMedian + include Gitlab::Database::Median def initialize(project, from:) @project = project @@ -122,7 +122,9 @@ class CycleAnalytics case ActiveRecord::Base.connection.adapter_name when 'PostgreSQL' - result.first['median'].to_f + result = result.first.presence + median = result['median'] if result + median.to_f if median when 'Mysql2' result.to_a.flatten.first end diff --git a/app/models/cycle_analytics/queries.rb b/app/models/cycle_analytics/queries.rb deleted file mode 100644 index 7bdf1fb6290..00000000000 --- a/app/models/cycle_analytics/queries.rb +++ /dev/null @@ -1,80 +0,0 @@ -class CycleAnalytics - class Queries - def initialize(project) - @project = project - end - - def issues(options = {}) - @issues_data ||= - begin - issues_query(options).preload(:metrics).map { |issue| { issue: issue } } - end - end - - def merge_requests_closing_issues(options = {}) - @merge_requests_closing_issues_data ||= - begin - merge_requests_closing_issues = MergeRequestsClosingIssues.where(issue: issues_query(options)).preload(issue: [:metrics], merge_request: [:metrics]) - - merge_requests_closing_issues.map do |record| - { issue: record.issue, merge_request: record.merge_request } - end - end - end - - def issue_first_associated_with_milestone_at - lambda do |data_point| - issue = data_point[:issue] - issue.metrics.first_associated_with_milestone_at if issue.metrics.present? - end - end - - def issue_first_added_to_list_label_at - lambda do |data_point| - issue = data_point[:issue] - issue.metrics.first_added_to_board_at if issue.metrics.present? - end - end - - def issue_first_mentioned_in_commit_at - lambda do |data_point| - issue = data_point[:issue] - issue.metrics.first_mentioned_in_commit_at if issue.metrics.present? - end - end - - def merge_request_merged_at - lambda do |data_point| - merge_request = data_point[:merge_request] - merge_request.metrics.merged_at if merge_request.metrics.present? - end - end - - def merge_request_build_started_at - lambda do |data_point| - merge_request = data_point[:merge_request] - merge_request.metrics.latest_build_started_at if merge_request.metrics.present? - end - end - - def merge_request_build_finished_at - lambda do |data_point| - merge_request = data_point[:merge_request] - merge_request.metrics.latest_build_finished_at if merge_request.metrics.present? - end - end - - def merge_request_deployed_to_production_at - lambda do |data_point| - merge_request = data_point[:merge_request] - merge_request.metrics.first_deployed_to_production_at if merge_request.metrics.present? - end - end - - private - - def issues_query(created_after:) - @project.issues.where("created_at >= ?", created_after) - end - end -end diff --git a/app/models/deployment.rb b/app/models/deployment.rb index 1e338889714..183d538a867 100644 --- a/app/models/deployment.rb +++ b/app/models/deployment.rb @@ -42,4 +42,35 @@ class Deployment < ActiveRecord::Base project.repository.is_ancestor?(commit.id, sha) end + + + def update_merge_request_metrics + if environment.update_merge_request_metrics? + query = project.merge_requests. + joins(:metrics). + where(target_branch: self.ref, merge_request_metrics: { first_deployed_to_production_at: nil }) + + merge_requests = + if previous_deployment + query.where("merge_request_metrics.merged_at <= ? AND merge_request_metrics.merged_at >= ?", + self.created_at, + previous_deployment.created_at) + else + query.where("merge_request_metrics.merged_at <= ?", self.created_at) + end + + # Need to use `map` instead of `select` because MySQL doesn't allow `SELECT`ing from the same table + # that we're updating. + MergeRequest::Metrics.where(merge_request_id: merge_requests.map(&:id), first_deployed_to_production_at: nil). + update_all(first_deployed_to_production_at: self.created_at) + end + end + + def previous_deployment + @previous_deployment ||= + project.deployments.joins(:environment). + where(environments: { name: self.environment.name }, ref: self.ref). + where.not(id: self.id). + take + end end diff --git a/app/models/environment.rb b/app/models/environment.rb index 33c9abf382a..49e0a20640c 100644 --- a/app/models/environment.rb +++ b/app/models/environment.rb @@ -43,4 +43,8 @@ class Environment < ActiveRecord::Base last_deployment.includes_commit?(commit) end + + def update_merge_request_metrics? + self.name == "production" + end end diff --git a/app/models/issue.rb b/app/models/issue.rb index 2403748d017..06af94adf5e 100644 --- a/app/models/issue.rb +++ b/app/models/issue.rb @@ -23,9 +23,9 @@ class Issue < ActiveRecord::Base has_many :events, as: :target, dependent: :destroy - has_one :metrics, dependent: :destroy + has_one :metrics - has_many :merge_requests_closing_issues, class_name: MergeRequestsClosingIssues + has_many :merge_requests_closing_issues, class_name: 'MergeRequestsClosingIssues' has_many :closed_by_merge_requests, through: :merge_requests_closing_issues, source: :merge_request validates :project, presence: true @@ -202,7 +202,7 @@ class Issue < ActiveRecord::Base # From all notes on this issue, we'll select the system notes about linked # merge requests. Of those, the MRs closing `self` are returned. def closed_by_merge_requests(current_user = nil) - return [] if !open? + return [] unless open? ext = all_references(current_user) diff --git a/app/models/issue/metrics.rb b/app/models/issue/metrics.rb index 3c1fcea9e6f..4436696cc1a 100644 --- a/app/models/issue/metrics.rb +++ b/app/models/issue/metrics.rb @@ -13,10 +13,6 @@ class Issue::Metrics < ActiveRecord::Base self.save if self.changed? end - def record_commit_mention!(commit_time) - self.update(first_mentioned_in_commit_at: commit_time) if self.first_mentioned_in_commit_at.blank? - end - private def issue_assigned_to_list_label? diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index e7af37a6c0a..2daaaf42da6 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -13,11 +13,11 @@ class MergeRequest < ActiveRecord::Base has_many :merge_request_diffs, dependent: :destroy has_one :merge_request_diff, -> { order('merge_request_diffs.id DESC') } - has_one :metrics, dependent: :destroy + has_one :metrics has_many :events, as: :target, dependent: :destroy - has_many :merge_requests_closing_issues, class_name: MergeRequestsClosingIssues + has_many :merge_requests_closing_issues, class_name: 'MergeRequestsClosingIssues' has_many :issues_closed, through: :merge_requests_closing_issues, source: :issue serialize :merge_params, Hash @@ -513,7 +513,7 @@ class MergeRequest < ActiveRecord::Base # running `ReferenceExtractor` on each of them separately. def cache_merge_request_closes_issues!(current_user = self.author) transaction do - self.merge_requests_closing_issues.destroy_all + self.merge_requests_closing_issues.delete_all closes_issues(current_user).each do |issue| MergeRequestsClosingIssues.create!(merge_request: self, issue: issue) end diff --git a/app/models/merge_request/metrics.rb b/app/models/merge_request/metrics.rb index 8c42c830fac..697a2e303fb 100644 --- a/app/models/merge_request/metrics.rb +++ b/app/models/merge_request/metrics.rb @@ -8,16 +8,4 @@ class MergeRequest::Metrics < ActiveRecord::Base self.save if self.changed? end - - def record_production_deploy!(deploy_time) - self.update(first_deployed_to_production_at: deploy_time) if self.first_deployed_to_production_at.blank? - end - - def record_latest_build_start_time!(start_time) - self.update(latest_build_started_at: start_time, latest_build_finished_at: nil) - end - - def record_latest_build_finish_time!(finish_time) - self.update(latest_build_finished_at: finish_time) - end end diff --git a/app/services/create_deployment_service.rb b/app/services/create_deployment_service.rb index aad5cc8a15b..e958e91d612 100644 --- a/app/services/create_deployment_service.rb +++ b/app/services/create_deployment_service.rb @@ -13,39 +13,13 @@ class CreateDeploymentService < BaseService deployable: deployable ) - update_merge_request_metrics(deployment, environment) + deployment.update_merge_request_metrics deployment end private - def update_merge_request_metrics(deployment, environment) - if environment.name == "production" - query = project.merge_requests.joins("LEFT OUTER JOIN merge_request_metrics ON merge_request_metrics.merge_request_id = merge_requests.id"). - where(target_branch: params[:ref], "merge_request_metrics.first_deployed_to_production_at" => nil) - - previous_deployment = previous_deployment_for_ref(deployment) - merge_requests_deployed_to_production_for_first_time = if previous_deployment - query.where("merge_request_metrics.merged_at < ? AND merge_request_metrics.merged_at > ?", deployment.created_at, previous_deployment.created_at) - else - query.where("merge_request_metrics.merged_at < ?", deployment.created_at) - end - - merge_requests_deployed_to_production_for_first_time.each { |merge_request| merge_request.metrics.record_production_deploy!(deployment.created_at) } - end - end - - def previous_deployment_for_ref(current_deployment) - @previous_deployment_for_ref ||= - project.deployments.joins(:environment). - where("environments.name": params[:environment], ref: params[:ref]). - where.not(id: current_deployment.id). - first - end - - private - def find_or_create_environment project.environments.find_or_create_by(name: expanded_name) do |environment| environment.external_url = expanded_url diff --git a/app/services/git_push_service.rb b/app/services/git_push_service.rb index cde993221ff..c499427605a 100644 --- a/app/services/git_push_service.rb +++ b/app/services/git_push_service.rb @@ -190,6 +190,8 @@ class GitPushService < BaseService def update_issue_metrics(commit, authors) mentioned_issues = commit.all_references(authors[commit]).issues - mentioned_issues.each { |issue| issue.metrics.record_commit_mention!(commit.committed_date) if issue.metrics.present? } + + Issue::Metrics.where(issue_id: mentioned_issues.map(&:id), first_mentioned_in_commit_at: nil). + update_all(first_mentioned_in_commit_at: commit.committed_date) end end diff --git a/db/fixtures/development/17_cycle_analytics.rb b/db/fixtures/development/17_cycle_analytics.rb index f51d28d9ebf..e882a492757 100644 --- a/db/fixtures/development/17_cycle_analytics.rb +++ b/db/fixtures/development/17_cycle_analytics.rb @@ -4,7 +4,7 @@ require './spec/support/test_env' class Gitlab::Seeder::CycleAnalytics def initialize(project, perf: false) @project = project - @user = User.find(1) + @user = User.order(:id).last @issue_count = perf ? 1000 : 5 stub_git_pre_receive! end diff --git a/db/migrate/20160824124900_add_table_issue_metrics.rb b/db/migrate/20160824124900_add_table_issue_metrics.rb index 04dff26043d..3d296c7e884 100644 --- a/db/migrate/20160824124900_add_table_issue_metrics.rb +++ b/db/migrate/20160824124900_add_table_issue_metrics.rb @@ -5,12 +5,12 @@ class AddTableIssueMetrics < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers # Set this constant to true if this migration requires downtime. - DOWNTIME = false + DOWNTIME = true # When a migration requires downtime you **must** uncomment the following # constant and define a short and easy to understand explanation as to why the # migration requires downtime. - # DOWNTIME_REASON = '' + DOWNTIME_REASON = 'Adding foreign key' # When using the methods "add_concurrent_index" or "add_column_with_default" # you must disable the use of transactions as these methods can not run in an @@ -25,7 +25,7 @@ class AddTableIssueMetrics < ActiveRecord::Migration def change create_table :issue_metrics do |t| - t.references :issue, index: { name: "index_issue_metrics" }, foreign_key: true, null: false + t.references :issue, index: { name: "index_issue_metrics" }, foreign_key: true, dependent: :delete, null: false t.datetime 'first_mentioned_in_commit_at' t.datetime 'first_associated_with_milestone_at' diff --git a/db/migrate/20160825052008_add_table_merge_request_metrics.rb b/db/migrate/20160825052008_add_table_merge_request_metrics.rb index fb5494c5e8b..d743134d9a2 100644 --- a/db/migrate/20160825052008_add_table_merge_request_metrics.rb +++ b/db/migrate/20160825052008_add_table_merge_request_metrics.rb @@ -5,12 +5,12 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers # Set this constant to true if this migration requires downtime. - DOWNTIME = false + DOWNTIME = true # When a migration requires downtime you **must** uncomment the following # constant and define a short and easy to understand explanation as to why the # migration requires downtime. - # DOWNTIME_REASON = '' + DOWNTIME_REASON = 'Adding foreign key' # When using the methods "add_concurrent_index" or "add_column_with_default" # you must disable the use of transactions as these methods can not run in an @@ -25,7 +25,7 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration def change create_table :merge_request_metrics do |t| - t.references :merge_request, index: { name: "index_merge_request_metrics" }, foreign_key: true, null: false + t.references :merge_request, index: { name: "index_merge_request_metrics" }, foreign_key: true, dependent: :delete, null: false t.datetime 'latest_build_started_at' t.datetime 'latest_build_finished_at' diff --git a/db/migrate/20160915042921_create_merge_requests_closing_issues.rb b/db/migrate/20160915042921_create_merge_requests_closing_issues.rb index 02c28cbe0dd..049340f7085 100644 --- a/db/migrate/20160915042921_create_merge_requests_closing_issues.rb +++ b/db/migrate/20160915042921_create_merge_requests_closing_issues.rb @@ -25,8 +25,8 @@ class CreateMergeRequestsClosingIssues < ActiveRecord::Migration def change create_table :merge_requests_closing_issues do |t| - t.references :merge_request, foreign_key: true, index: true, null: false - t.references :issue, foreign_key: true, index: true, null: false + t.references :merge_request, foreign_key: true, index: true, dependent: :delete, null: false + t.references :issue, foreign_key: true, index: true, dependent: :delete, null: false t.timestamps null: false end diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb new file mode 100644 index 00000000000..3ede0c6acd9 --- /dev/null +++ b/lib/gitlab/database/median.rb @@ -0,0 +1,80 @@ +# https://www.periscopedata.com/blog/medians-in-sql.html +module Gitlab + module Database + module Median + def median_datetime(arel_table, query_so_far, column_sym) + case ActiveRecord::Base.connection.adapter_name + when 'PostgreSQL' + pg_median_datetime(arel_table, query_so_far, column_sym) + when 'Mysql2' + mysql_median_datetime(arel_table, query_so_far, column_sym) + else + raise NotImplementedError, "We haven't implemented a database median strategy for your database type." + end + end + + def mysql_median_datetime(arel_table, query_so_far, column_sym) + query = arel_table. + from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). + project(average([arel_table[column_sym]], 'median')). + where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"), + Arel::Nodes::And.new([Arel.sql('@ct/2.0'), + Arel.sql('@ct/2.0 + 1')]))). + # Disallow negative values + where(arel_table[column_sym].gteq(0)) + + [Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), + Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), + Arel.sql("set @row_id := 0;"), + query, + Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")] + end + + def pg_median_datetime(arel_table, query_so_far, column_sym) + # Create a CTE with the column we're operating on, row number (after sorting by the column + # we're operating on), and count of the table we're operating on (duplicated across) all rows + # of the CTE. For example, if we're looking to find the median of the `projects.star_count` + # column, the CTE might look like this: + # + # star_count | row_id | ct + # ------------+--------+---- + # 5 | 1 | 3 + # 9 | 2 | 3 + # 15 | 3 | 3 + cte_table = Arel::Table.new("ordered_records") + cte = Arel::Nodes::As.new(cte_table, + arel_table. + project(arel_table[column_sym].as(column_sym.to_s), + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), + Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), + arel_table.project("COUNT(1)").as('ct')). + # Disallow negative values + where(arel_table[column_sym].gteq(zero_interval))) + + # From the CTE, select either the middle row or the middle two rows (this is accomplished + # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the + # selected rows, and this is the median value. + cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")). + where(Arel::Nodes::Between.new(cte_table[:row_id], + Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)]))). + with(query_so_far, cte) + end + + private + + def average(args, as) + Arel::Nodes::NamedFunction.new("AVG", args, as) + end + + def extract_epoch(arel_attribute) + Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")}) + end + + # Need to cast '0' to an INTERVAL before we can check if the interval is positive + def zero_interval + Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")]) + end + end + end +end