Add indexes and change SQL for expired artifacts to deal with artifacts migration efficiently

Artifacts are in the middle of being migrated from ci_builds to
ci_job_artifacts. The expiration date is currently visible in both of
these tables and the test for whether an expired artifact is present
for a job is complex as it requires checking both the of the tables.

Add two new indexes, one on ci_builds.artifacts_expire_at and one on
ci_job_artifacts.expire_at to enable finding expired artifacts
efficiently.

And until the migration is finished, replace the SQL for finding
expired and non-expired artifacts with a hand-crafted UNION ALL based
query instead of using OR. This overcomes a database optimizer
limitation that prevents it from using these indexes.

When the migration is finished the next version should remove this
query and replace it with a much simpler query on just
ci_job_artifacts. See
https://gitlab.com/gitlab-org/gitlab-ce/issues/42561 for followup.
This commit is contained in:
Greg Stark 2018-01-19 12:44:27 +00:00
parent dbb934c8e2
commit 271e7a3253
5 changed files with 68 additions and 6 deletions

View File

@ -41,12 +41,44 @@ module Ci
scope :unstarted, ->() { where(runner_id: nil) }
scope :ignore_failures, ->() { where(allow_failure: false) }
# This convoluted mess is because we need to handle two cases of
# artifact files during the migration. And a simple OR clause
# makes it impossible to optimize.
# Instead we want to use UNION ALL and do two carefully
# constructed disjoint queries. But Rails cannot handle UNION or
# UNION ALL queries so we do the query in a subquery and wrap it
# in an otherwise redundant WHERE IN query (IN is fine for
# non-null columns).
# This should all be ripped out when the migration is finished and
# replaced with just the new storage to avoid the extra work.
scope :with_artifacts, ->() do
where('(artifacts_file IS NOT NULL AND artifacts_file <> ?) OR EXISTS (?)',
'', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id'))
old = where(%q[artifacts_file <> ''])
new = where(%q[(artifacts_file IS NULL OR artifacts_file = '') AND EXISTS (?)],
Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id'))
union = Gitlab::SQL::Union.new([old, new], remove_duplicates: false)
Ci::Build.from("(#{union.to_sql}) #{Ci::Build.table_name}")
end
scope :with_artifacts_not_expired, ->() { with_artifacts.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts.where('artifacts_expire_at < ?', Time.now) }
scope :with_artifacts_not_expired, ->() do
old = where(%q[artifacts_file <> '' AND (artifacts_expire_at IS NULL OR artifacts_expire_at > ?)], Time.now)
new = where(%q[(artifacts_file IS NULL OR artifacts_file = '') AND EXISTS (?)],
Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id AND (expire_at IS NULL OR expire_at > ?)', Time.now))
union = Gitlab::SQL::Union.new([old, new], remove_duplicates: false)
Ci::Build.from("(#{union.to_sql}) #{Ci::Build.table_name}")
end
scope :with_expired_artifacts, ->() do
old = where(%q[artifacts_file <> '' AND artifacts_expire_at < ?], Time.now)
new = where(%q[(artifacts_file IS NULL OR artifacts_file = '') AND EXISTS (?)],
Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id AND expire_at < ?', Time.now))
union = Gitlab::SQL::Union.new([old, new], remove_duplicates: false)
Ci::Build.from("(#{union.to_sql}) #{Ci::Build.table_name}")
end
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) }
scope :ref_protected, -> { where(protected: true) }

View File

@ -0,0 +1,5 @@
---
title: Change SQL for expired artifacts to use new ci_job_artifacts.expire_at
merge_request: 16578
author:
type: performance

View File

@ -0,0 +1,23 @@
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class OptimizeCiJobArtifacts < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
disable_ddl_transaction!
def up
# job_id is just here to be a covering index for index only scans
# since we'll almost always be joining against ci_builds on job_id
add_concurrent_index(:ci_job_artifacts, [:expire_at, :job_id])
add_concurrent_index(:ci_builds, [:artifacts_expire_at], where: "artifacts_file <> ''")
end
def down
remove_concurrent_index(:ci_job_artifacts, [:expire_at, :job_id])
remove_concurrent_index(:ci_builds, [:artifacts_expire_at], where: "artifacts_file <> ''")
end
end

View File

@ -293,6 +293,7 @@ ActiveRecord::Schema.define(version: 20180206200543) do
t.integer "failure_reason"
end
add_index "ci_builds", ["artifacts_expire_at"], name: "index_ci_builds_on_artifacts_expire_at", where: "(artifacts_file <> ''::text)", using: :btree
add_index "ci_builds", ["auto_canceled_by_id"], name: "index_ci_builds_on_auto_canceled_by_id", using: :btree
add_index "ci_builds", ["commit_id", "stage_idx", "created_at"], name: "index_ci_builds_on_commit_id_and_stage_idx_and_created_at", using: :btree
add_index "ci_builds", ["commit_id", "status", "type"], name: "index_ci_builds_on_commit_id_and_status_and_type", using: :btree
@ -333,6 +334,7 @@ ActiveRecord::Schema.define(version: 20180206200543) do
t.string "file"
end
add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree
add_index "ci_job_artifacts", ["job_id", "file_type"], name: "index_ci_job_artifacts_on_job_id_and_file_type", unique: true, using: :btree
add_index "ci_job_artifacts", ["project_id"], name: "index_ci_job_artifacts_on_project_id", using: :btree

View File

@ -180,8 +180,8 @@ FactoryBot.define do
trait :artifacts do
after(:create) do |build|
create(:ci_job_artifact, :archive, job: build)
create(:ci_job_artifact, :metadata, job: build)
create(:ci_job_artifact, :archive, job: build, expire_at: build.artifacts_expire_at)
create(:ci_job_artifact, :metadata, job: build, expire_at: build.artifacts_expire_at)
build.reload
end
end