Improve performance of stale scheduled builds search

This commit is contained in:
Shinya Maeda 2018-10-03 18:52:18 +09:00
parent 980c0e19d7
commit 5974eff83c
4 changed files with 17 additions and 32 deletions

View file

@ -10,17 +10,16 @@ class StuckCiJobsWorker
BUILD_PENDING_OUTDATED_TIMEOUT = 1.day
BUILD_SCHEDULED_OUTDATED_TIMEOUT = 1.hour
BUILD_PENDING_STUCK_TIMEOUT = 1.hour
BUILD_SCHEDULED_OUTDATED_BATCH_SIZE = 100
def perform
return unless try_obtain_lease
Rails.logger.info "#{self.class}: Cleaning stuck builds"
drop :running, BUILD_RUNNING_OUTDATED_TIMEOUT
drop :pending, BUILD_PENDING_OUTDATED_TIMEOUT
drop_stuck :pending, BUILD_PENDING_STUCK_TIMEOUT
drop_stale_scheduled_builds
drop :running, BUILD_RUNNING_OUTDATED_TIMEOUT, 'ci_builds.updated_at < ?', :stuck_or_timeout_failure
drop :pending, BUILD_PENDING_OUTDATED_TIMEOUT, 'ci_builds.updated_at < ?', :stuck_or_timeout_failure
drop :scheduled, BUILD_SCHEDULED_OUTDATED_TIMEOUT, 'scheduled_at IS NOT NULL AND scheduled_at < ?', :stale_schedule
drop_stuck :pending, BUILD_PENDING_STUCK_TIMEOUT, 'ci_builds.updated_at < ?', :stuck_or_timeout_failure
remove_lease
end
@ -35,25 +34,25 @@ class StuckCiJobsWorker
Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid)
end
def drop(status, timeout)
search(status, timeout) do |build|
drop_build :outdated, build, status, timeout, :stuck_or_timeout_failure
def drop(status, timeout, condition, reason)
search(status, timeout, condition) do |build|
drop_build :outdated, build, status, timeout, reason
end
end
def drop_stuck(status, timeout)
search(status, timeout) do |build|
def drop_stuck(status, timeout, condition, reason)
search(status, timeout, condition) do |build|
break unless build.stuck?
drop_build :stuck, build, status, timeout, :stuck_or_timeout_failure
drop_build :stuck, build, status, timeout, reason
end
end
# rubocop: disable CodeReuse/ActiveRecord
def search(status, timeout)
def search(status, timeout, condition)
loop do
jobs = Ci::Build.where(status: status)
.where('ci_builds.updated_at < ?', timeout.ago)
.where(condition, timeout.ago)
.includes(:tags, :runner, project: :namespace)
.limit(100)
.to_a
@ -64,21 +63,6 @@ class StuckCiJobsWorker
end
end
end
def drop_stale_scheduled_builds
# `ci_builds` table has a partial index on `id` with `scheduled_at <> NULL` condition.
# Therefore this query's first step uses Index Search, and the following expensive
# filter `scheduled_at < ?` will only perform on a small subset (max: 100 rows)
Ci::Build.include(EachBatch)
.where('scheduled_at IS NOT NULL')
.each_batch(of: BUILD_SCHEDULED_OUTDATED_BATCH_SIZE) do |relation|
relation
.where('scheduled_at < ?', BUILD_SCHEDULED_OUTDATED_TIMEOUT.ago)
.find_each(batch_size: BUILD_SCHEDULED_OUTDATED_BATCH_SIZE) do |build|
drop_build(:outdated, build, :scheduled, BUILD_SCHEDULED_OUTDATED_TIMEOUT, :stale_schedule)
end
end
end
# rubocop: enable CodeReuse/ActiveRecord
def drop_build(type, build, status, timeout, reason)

View file

@ -4,12 +4,12 @@ class AddPartialIndexToScheduledAt < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'partial_index_ci_builds_on_id_with_scheduled_jobs'.freeze
INDEX_NAME = 'partial_index_ci_builds_on_scheduled_at_with_scheduled_jobs'.freeze
disable_ddl_transaction!
def up
add_concurrent_index(:ci_builds, :id, where: "scheduled_at IS NOT NULL", name: INDEX_NAME)
add_concurrent_index(:ci_builds, [:scheduled_at, :id], where: "scheduled_at IS NOT NULL", name: INDEX_NAME)
end
def down

View file

@ -344,10 +344,10 @@ ActiveRecord::Schema.define(version: 20180924201039) do
add_index "ci_builds", ["commit_id", "type", "name", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_name_and_ref", using: :btree
add_index "ci_builds", ["commit_id", "type", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_ref", using: :btree
add_index "ci_builds", ["id"], name: "partial_index_ci_builds_on_id_with_legacy_artifacts", where: "(artifacts_file <> ''::text)", using: :btree
add_index "ci_builds", ["id"], name: "partial_index_ci_builds_on_id_with_scheduled_jobs", where: "(scheduled_at IS NOT NULL)", using: :btree
add_index "ci_builds", ["project_id", "id"], name: "index_ci_builds_on_project_id_and_id", using: :btree
add_index "ci_builds", ["protected"], name: "index_ci_builds_on_protected", using: :btree
add_index "ci_builds", ["runner_id"], name: "index_ci_builds_on_runner_id", using: :btree
add_index "ci_builds", ["scheduled_at", "id"], name: "partial_index_ci_builds_on_scheduled_at_with_scheduled_jobs", where: "(scheduled_at IS NOT NULL)", using: :btree
add_index "ci_builds", ["stage_id", "stage_idx"], name: "tmp_build_stage_position_index", where: "(stage_idx IS NOT NULL)", using: :btree
add_index "ci_builds", ["stage_id"], name: "index_ci_builds_on_stage_id", using: :btree
add_index "ci_builds", ["status", "type", "runner_id"], name: "index_ci_builds_on_status_and_type_and_runner_id", using: :btree
@ -2290,6 +2290,7 @@ ActiveRecord::Schema.define(version: 20180924201039) do
add_foreign_key "boards", "namespaces", column: "group_id", on_delete: :cascade
add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade
add_foreign_key "chat_teams", "namespaces", on_delete: :cascade
add_foreign_key "ci_build_schedules", "ci_builds", column: "build_id", on_delete: :cascade
add_foreign_key "ci_build_trace_chunks", "ci_builds", column: "build_id", on_delete: :cascade
add_foreign_key "ci_build_trace_section_names", "projects", on_delete: :cascade
add_foreign_key "ci_build_trace_sections", "ci_build_trace_section_names", column: "section_name_id", name: "fk_264e112c66", on_delete: :cascade

View file

@ -127,7 +127,7 @@ describe StuckCiJobsWorker do
end
end
describe 'drop_stale_scheduled_builds' do
describe 'drop stale scheduled builds' do
let(:status) { 'scheduled' }
let(:updated_at) { }