diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb index 17b7ee4f07e..32d7cb3424e 100644 --- a/app/models/ci/job_artifact.rb +++ b/app/models/ci/job_artifact.rb @@ -48,6 +48,20 @@ module Ci gzip: 3 } + # `file_location` indicates where actual files are stored. + # Ideally, actual files should be stored in the same directory, and use the same + # convention to generate its path. However, sometimes we can't do so due to backward-compatibility. + # + # legacy_path ... The actual file is stored at a path consists of a timestamp + # and raw project/model IDs. Those rows were migrated from + # `ci_builds.artifacts_file` and `ci_builds.artifacts_metadata` + # hashed_path ... The actual file is stored at a path consists of a SHA2 based on the project ID. + # This is the default value. + enum file_location: { + legacy_path: 1, + hashed_path: 2 + } + FILE_FORMAT_ADAPTERS = { gzip: Gitlab::Ci::Build::Artifacts::GzipFileAdapter }.freeze @@ -72,6 +86,10 @@ module Ci [nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store) end + def hashed_path? + super || self.file_location.nil? + end + def expire_in expire_at - Time.now if expire_at end @@ -108,7 +126,7 @@ module Ci end def update_project_statistics_after_destroy - update_project_statistics(-self.size) + update_project_statistics(-self.size.to_i) end def update_project_statistics(difference) diff --git a/app/uploaders/job_artifact_uploader.rb b/app/uploaders/job_artifact_uploader.rb index f6af023e0f9..557b13a8bd6 100644 --- a/app/uploaders/job_artifact_uploader.rb +++ b/app/uploaders/job_artifact_uploader.rb @@ -5,6 +5,7 @@ class JobArtifactUploader < GitlabUploader include ObjectStorage::Concern ObjectNotReadyError = Class.new(StandardError) + UnknownFileLocationError = Class.new(StandardError) storage_options Gitlab.config.artifacts @@ -23,10 +24,22 @@ class JobArtifactUploader < GitlabUploader def dynamic_segment raise ObjectNotReadyError, 'JobArtifact is not ready' unless model.id - creation_date = model.created_at.utc.strftime('%Y_%m_%d') + if model.hashed_path? + hashed_path + elsif model.legacy_path? + legacy_path + else + raise UnknownFileLocationError + end + end + def hashed_path File.join(disk_hash[0..1], disk_hash[2..3], disk_hash, - creation_date, model.job_id.to_s, model.id.to_s) + model.created_at.utc.strftime('%Y_%m_%d'), model.job_id.to_s, model.id.to_s) + end + + def legacy_path + File.join(model.created_at.utc.strftime('%Y_%m'), model.project_id.to_s, model.job_id.to_s) end def disk_hash diff --git a/changelogs/unreleased/add-background-migration-for-legacy-traces.yml b/changelogs/unreleased/add-background-migration-for-legacy-traces.yml new file mode 100644 index 00000000000..3d5a0b4e452 --- /dev/null +++ b/changelogs/unreleased/add-background-migration-for-legacy-traces.yml @@ -0,0 +1,5 @@ +--- +title: Add background migrations for legacy artifacts +merge_request: 18615 +author: +type: performance diff --git a/db/migrate/20180815160409_add_file_location_to_ci_job_artifacts.rb b/db/migrate/20180815160409_add_file_location_to_ci_job_artifacts.rb new file mode 100644 index 00000000000..620342005fe --- /dev/null +++ b/db/migrate/20180815160409_add_file_location_to_ci_job_artifacts.rb @@ -0,0 +1,9 @@ +class AddFileLocationToCiJobArtifacts < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + add_column :ci_job_artifacts, :file_location, :integer, limit: 2 + end +end diff --git a/db/migrate/20180815170510_add_partial_index_to_ci_builds_artifacts_file.rb b/db/migrate/20180815170510_add_partial_index_to_ci_builds_artifacts_file.rb new file mode 100644 index 00000000000..5e041ea6559 --- /dev/null +++ b/db/migrate/20180815170510_add_partial_index_to_ci_builds_artifacts_file.rb @@ -0,0 +1,16 @@ +class AddPartialIndexToCiBuildsArtifactsFile < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + INDEX_NAME = 'partial_index_ci_builds_on_id_with_legacy_artifacts'.freeze + + disable_ddl_transaction! + + def up + add_concurrent_index(:ci_builds, :id, where: "artifacts_file <> ''", name: INDEX_NAME) + end + + def down + remove_concurrent_index_by_name(:ci_builds, INDEX_NAME) + end +end diff --git a/db/post_migrate/20180816161409_migrate_legacy_artifacts_to_job_artifacts.rb b/db/post_migrate/20180816161409_migrate_legacy_artifacts_to_job_artifacts.rb new file mode 100644 index 00000000000..2dd711e9c10 --- /dev/null +++ b/db/post_migrate/20180816161409_migrate_legacy_artifacts_to_job_artifacts.rb @@ -0,0 +1,32 @@ +class MigrateLegacyArtifactsToJobArtifacts < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + MIGRATION = 'MigrateLegacyArtifacts'.freeze + BATCH_SIZE = 100 + + disable_ddl_transaction! + + class Build < ActiveRecord::Base + include EachBatch + + self.table_name = 'ci_builds' + self.inheritance_column = :_type_disabled + + scope :with_legacy_artifacts, -> { where("artifacts_file <> ''") } + end + + def up + MigrateLegacyArtifactsToJobArtifacts::Build + .with_legacy_artifacts.tap do |relation| + queue_background_migration_jobs_by_range_at_intervals(relation, + MIGRATION, + 5.minutes, + batch_size: BATCH_SIZE) + end + end + + def down + # no-op + end +end diff --git a/db/schema.rb b/db/schema.rb index 380d4e49ddf..8cfbc1103cb 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -341,6 +341,7 @@ ActiveRecord::Schema.define(version: 20180816193530) do add_index "ci_builds", ["commit_id", "status", "type"], name: "index_ci_builds_on_commit_id_and_status_and_type", using: :btree add_index "ci_builds", ["commit_id", "type", "name", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_name_and_ref", using: :btree add_index "ci_builds", ["commit_id", "type", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_ref", using: :btree + add_index "ci_builds", ["id"], name: "partial_index_ci_builds_on_id_with_legacy_artifacts", where: "(artifacts_file <> ''::text)", using: :btree add_index "ci_builds", ["project_id", "id"], name: "index_ci_builds_on_project_id_and_id", using: :btree add_index "ci_builds", ["protected"], name: "index_ci_builds_on_protected", using: :btree add_index "ci_builds", ["runner_id"], name: "index_ci_builds_on_runner_id", using: :btree @@ -396,6 +397,7 @@ ActiveRecord::Schema.define(version: 20180816193530) do t.string "file" t.binary "file_sha256" t.integer "file_format", limit: 2 + t.integer "file_location", limit: 2 end add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree diff --git a/lib/gitlab/background_migration/migrate_legacy_artifacts.rb b/lib/gitlab/background_migration/migrate_legacy_artifacts.rb new file mode 100644 index 00000000000..5cd638083b0 --- /dev/null +++ b/lib/gitlab/background_migration/migrate_legacy_artifacts.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true +# rubocop:disable Metrics/ClassLength + +module Gitlab + module BackgroundMigration + ## + # The class to migrate job artifacts from `ci_builds` to `ci_job_artifacts` + class MigrateLegacyArtifacts + FILE_LOCAL_STORE = 1 # equal to ObjectStorage::Store::LOCAL + ARCHIVE_FILE_TYPE = 1 # equal to Ci::JobArtifact.file_types['archive'] + METADATA_FILE_TYPE = 2 # equal to Ci::JobArtifact.file_types['metadata'] + LEGACY_PATH_FILE_LOCATION = 1 # equal to Ci::JobArtifact.file_location['legacy_path'] + + def perform(start_id, stop_id) + ActiveRecord::Base.transaction do + insert_archives(start_id, stop_id) + insert_metadatas(start_id, stop_id) + delete_legacy_artifacts(start_id, stop_id) + end + end + + private + + def insert_archives(start_id, stop_id) + ActiveRecord::Base.connection.execute <<~SQL + INSERT INTO + ci_job_artifacts ( + project_id, + job_id, + expire_at, + file_location, + created_at, + updated_at, + file, + size, + file_store, + file_type + ) + SELECT + project_id, + id, + artifacts_expire_at, + #{LEGACY_PATH_FILE_LOCATION}, + created_at, + created_at, + artifacts_file, + artifacts_size, + COALESCE(artifacts_file_store, #{FILE_LOCAL_STORE}), + #{ARCHIVE_FILE_TYPE} + FROM + ci_builds + WHERE + id BETWEEN #{start_id.to_i} AND #{stop_id.to_i} + AND artifacts_file <> '' + AND NOT EXISTS ( + SELECT + 1 + FROM + ci_job_artifacts + WHERE + ci_builds.id = ci_job_artifacts.job_id + AND ci_job_artifacts.file_type = #{ARCHIVE_FILE_TYPE}) + SQL + end + + def insert_metadatas(start_id, stop_id) + ActiveRecord::Base.connection.execute <<~SQL + INSERT INTO + ci_job_artifacts ( + project_id, + job_id, + expire_at, + file_location, + created_at, + updated_at, + file, + size, + file_store, + file_type + ) + SELECT + project_id, + id, + artifacts_expire_at, + #{LEGACY_PATH_FILE_LOCATION}, + created_at, + created_at, + artifacts_metadata, + NULL, + COALESCE(artifacts_metadata_store, #{FILE_LOCAL_STORE}), + #{METADATA_FILE_TYPE} + FROM + ci_builds + WHERE + id BETWEEN #{start_id.to_i} AND #{stop_id.to_i} + AND artifacts_file <> '' + AND artifacts_metadata <> '' + AND NOT EXISTS ( + SELECT + 1 + FROM + ci_job_artifacts + WHERE + ci_builds.id = ci_job_artifacts.job_id + AND ci_job_artifacts.file_type = #{METADATA_FILE_TYPE}) + SQL + end + + def delete_legacy_artifacts(start_id, stop_id) + ActiveRecord::Base.connection.execute <<~SQL + UPDATE + ci_builds + SET + artifacts_file = NULL, + artifacts_file_store = NULL, + artifacts_size = NULL, + artifacts_metadata = NULL, + artifacts_metadata_store = NULL + WHERE + id BETWEEN #{start_id.to_i} AND #{stop_id.to_i} + AND artifacts_file <> '' + SQL + end + end + end +end diff --git a/spec/factories/ci/job_artifacts.rb b/spec/factories/ci/job_artifacts.rb index 46aaaf6aa5d..f028803ca74 100644 --- a/spec/factories/ci/job_artifacts.rb +++ b/spec/factories/ci/job_artifacts.rb @@ -24,6 +24,12 @@ FactoryBot.define do end end + trait :legacy_archive do + archive + + file_location :legacy_path + end + trait :metadata do file_type :metadata file_format :gzip diff --git a/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb b/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb new file mode 100644 index 00000000000..2d1505dacfe --- /dev/null +++ b/spec/lib/gitlab/background_migration/migrate_legacy_artifacts_spec.rb @@ -0,0 +1,156 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::MigrateLegacyArtifacts, :migration, schema: 20180816161409 do + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:pipelines) { table(:ci_pipelines) } + let(:jobs) { table(:ci_builds) } + let(:job_artifacts) { table(:ci_job_artifacts) } + + subject { described_class.new.perform(*range) } + + context 'when a pipeline exists' do + let!(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') } + let!(:project) { projects.create!(name: 'gitlab', path: 'gitlab-ce', namespace_id: namespace.id) } + let!(:pipeline) { pipelines.create!(project_id: project.id, ref: 'master', sha: 'adf43c3a') } + + context 'when a legacy artifacts exists' do + let(:artifacts_expire_at) { 1.day.since.to_s } + let(:file_store) { ::ObjectStorage::Store::REMOTE } + + let!(:job) do + jobs.create!( + commit_id: pipeline.id, + project_id: project.id, + status: :success, + **artifacts_archive_attributes, + **artifacts_metadata_attributes) + end + + let(:artifacts_archive_attributes) do + { + artifacts_file: 'archive.zip', + artifacts_file_store: file_store, + artifacts_size: 123, + artifacts_expire_at: artifacts_expire_at + } + end + + let(:artifacts_metadata_attributes) do + { + artifacts_metadata: 'metadata.gz', + artifacts_metadata_store: file_store + } + end + + it 'has legacy artifacts' do + expect(jobs.pluck('artifacts_file, artifacts_file_store, artifacts_size, artifacts_expire_at')).to eq([artifacts_archive_attributes.values]) + expect(jobs.pluck('artifacts_metadata, artifacts_metadata_store')).to eq([artifacts_metadata_attributes.values]) + end + + it 'does not have new artifacts yet' do + expect(job_artifacts.count).to be_zero + end + + context 'when the record exists inside of the range of a background migration' do + let(:range) { [job.id, job.id] } + + it 'migrates a legacy artifact to ci_job_artifacts table' do + expect { subject }.to change { job_artifacts.count }.by(2) + + expect(job_artifacts.order(:id).pluck('project_id, job_id, file_type, file_store, size, expire_at, file, file_sha256, file_location')) + .to eq([[project.id, + job.id, + described_class::ARCHIVE_FILE_TYPE, + file_store, + artifacts_archive_attributes[:artifacts_size], + artifacts_expire_at, + 'archive.zip', + nil, + described_class::LEGACY_PATH_FILE_LOCATION], + [project.id, + job.id, + described_class::METADATA_FILE_TYPE, + file_store, + nil, + artifacts_expire_at, + 'metadata.gz', + nil, + described_class::LEGACY_PATH_FILE_LOCATION]]) + + expect(jobs.pluck('artifacts_file, artifacts_file_store, artifacts_size, artifacts_expire_at')).to eq([[nil, nil, nil, artifacts_expire_at]]) + expect(jobs.pluck('artifacts_metadata, artifacts_metadata_store')).to eq([[nil, nil]]) + end + + context 'when file_store is nil' do + let(:file_store) { nil } + + it 'has nullified file_store in all legacy artifacts' do + expect(jobs.pluck('artifacts_file_store, artifacts_metadata_store')).to eq([[nil, nil]]) + end + + it 'fills file_store by the value of local file store' do + subject + + expect(job_artifacts.pluck('file_store')).to all(eq(::ObjectStorage::Store::LOCAL)) + end + end + + context 'when new artifacts has already existed' do + context 'when only archive.zip existed' do + before do + job_artifacts.create!(project_id: project.id, job_id: job.id, file_type: described_class::ARCHIVE_FILE_TYPE, size: 999, file: 'archive.zip') + end + + it 'had archive.zip already' do + expect(job_artifacts.exists?(job_id: job.id, file_type: described_class::ARCHIVE_FILE_TYPE)).to be_truthy + end + + it 'migrates metadata' do + expect { subject }.to change { job_artifacts.count }.by(1) + + expect(job_artifacts.exists?(job_id: job.id, file_type: described_class::METADATA_FILE_TYPE)).to be_truthy + end + end + + context 'when both archive and metadata existed' do + before do + job_artifacts.create!(project_id: project.id, job_id: job.id, file_type: described_class::ARCHIVE_FILE_TYPE, size: 999, file: 'archive.zip') + job_artifacts.create!(project_id: project.id, job_id: job.id, file_type: described_class::METADATA_FILE_TYPE, size: 999, file: 'metadata.zip') + end + + it 'does not migrate' do + expect { subject }.not_to change { job_artifacts.count } + end + end + end + end + + context 'when the record exists outside of the range of a background migration' do + let(:range) { [job.id + 1, job.id + 1] } + + it 'does not migrate' do + expect { subject }.not_to change { job_artifacts.count } + end + end + end + + context 'when the job does not have legacy artifacts' do + let!(:job) { jobs.create!(commit_id: pipeline.id, project_id: project.id, status: :success) } + + it 'does not have the legacy artifacts in database' do + expect(jobs.count).to eq(1) + expect(jobs.pluck('artifacts_file, artifacts_file_store, artifacts_size, artifacts_expire_at')).to eq([[nil, nil, nil, nil]]) + expect(jobs.pluck('artifacts_metadata, artifacts_metadata_store')).to eq([[nil, nil]]) + end + + context 'when the record exists inside of the range of a background migration' do + let(:range) { [job.id, job.id] } + + it 'does not migrate' do + expect { subject }.not_to change { job_artifacts.count } + end + end + end + end +end diff --git a/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb b/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb new file mode 100644 index 00000000000..df82672f254 --- /dev/null +++ b/spec/migrations/migrate_legacy_artifacts_to_job_artifacts_spec.rb @@ -0,0 +1,73 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20180816161409_migrate_legacy_artifacts_to_job_artifacts.rb') + +describe MigrateLegacyArtifactsToJobArtifacts, :migration, :sidekiq do + let(:migration_class) { Gitlab::BackgroundMigration::MigrateLegacyArtifacts } + let(:migration_name) { migration_class.to_s.demodulize } + + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:pipelines) { table(:ci_pipelines) } + let(:jobs) { table(:ci_builds) } + let(:job_artifacts) { table(:ci_job_artifacts) } + let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') } + let(:project) { projects.create!(name: 'gitlab', path: 'gitlab-ce', namespace_id: namespace.id) } + let(:pipeline) { pipelines.create!(project_id: project.id, ref: 'master', sha: 'adf43c3a') } + let(:archive_file_type) { Gitlab::BackgroundMigration::MigrateLegacyArtifacts::ARCHIVE_FILE_TYPE } + let(:metadata_file_type) { Gitlab::BackgroundMigration::MigrateLegacyArtifacts::METADATA_FILE_TYPE } + let(:local_store) { ::ObjectStorage::Store::LOCAL } + let(:remote_store) { ::ObjectStorage::Store::REMOTE } + let(:legacy_location) { Gitlab::BackgroundMigration::MigrateLegacyArtifacts::LEGACY_PATH_FILE_LOCATION } + + context 'when legacy artifacts exist' do + before do + jobs.create!(id: 1, commit_id: pipeline.id, project_id: project.id, status: :success, artifacts_file: 'archive.zip') + jobs.create!(id: 2, commit_id: pipeline.id, project_id: project.id, status: :failed, artifacts_metadata: 'metadata.gz') + jobs.create!(id: 3, commit_id: pipeline.id, project_id: project.id, status: :failed, artifacts_file: 'archive.zip', artifacts_metadata: 'metadata.gz') + jobs.create!(id: 4, commit_id: pipeline.id, project_id: project.id, status: :running) + jobs.create!(id: 5, commit_id: pipeline.id, project_id: project.id, status: :success, artifacts_file: 'archive.zip', artifacts_file_store: remote_store, artifacts_metadata: 'metadata.gz') + jobs.create!(id: 6, commit_id: pipeline.id, project_id: project.id, status: :failed, artifacts_file: 'archive.zip', artifacts_metadata: 'metadata.gz') + end + + it 'schedules a background migration' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(migration_name).to be_scheduled_delayed_migration(5.minutes, 1, 6) + expect(BackgroundMigrationWorker.jobs.size).to eq 1 + end + end + end + + it 'migrates legacy artifacts to ci_job_artifacts table' do + migrate! + + expect(job_artifacts.order(:job_id, :file_type).pluck('project_id, job_id, file_type, file_store, size, expire_at, file, file_sha256, file_location')) + .to eq([[project.id, 1, archive_file_type, local_store, nil, nil, 'archive.zip', nil, legacy_location], + [project.id, 3, archive_file_type, local_store, nil, nil, 'archive.zip', nil, legacy_location], + [project.id, 3, metadata_file_type, local_store, nil, nil, 'metadata.gz', nil, legacy_location], + [project.id, 5, archive_file_type, remote_store, nil, nil, 'archive.zip', nil, legacy_location], + [project.id, 5, metadata_file_type, local_store, nil, nil, 'metadata.gz', nil, legacy_location], + [project.id, 6, archive_file_type, local_store, nil, nil, 'archive.zip', nil, legacy_location], + [project.id, 6, metadata_file_type, local_store, nil, nil, 'metadata.gz', nil, legacy_location]]) + end + end + + context 'when legacy artifacts do not exist' do + before do + jobs.create!(id: 1, commit_id: pipeline.id, project_id: project.id, status: :success) + jobs.create!(id: 2, commit_id: pipeline.id, project_id: project.id, status: :failed, artifacts_metadata: 'metadata.gz') + end + + it 'does not schedule background migrations' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(BackgroundMigrationWorker.jobs.size).to eq 0 + end + end + end + end +end diff --git a/spec/uploaders/job_artifact_uploader_spec.rb b/spec/uploaders/job_artifact_uploader_spec.rb index 3ad5fe7e3b3..061432f082a 100644 --- a/spec/uploaders/job_artifact_uploader_spec.rb +++ b/spec/uploaders/job_artifact_uploader_spec.rb @@ -40,6 +40,53 @@ describe JobArtifactUploader do it { is_expected.to end_with("ci_build_artifacts.zip") } end + describe '#dynamic_segment' do + let(:uploaded_content) { File.binread(Rails.root + 'spec/fixtures/ci_build_artifacts.zip') } + let(:model) { uploader.model } + + shared_examples_for 'Read file from legacy path' do + it 'store_path returns the legacy path' do + expect(model.file.store_path).to eq(File.join(model.created_at.utc.strftime('%Y_%m'), model.project_id.to_s, model.job_id.to_s, 'ci_build_artifacts.zip')) + end + + it 'has exactly the same content' do + expect(::File.binread(model.file.path)).to eq(uploaded_content) + end + end + + shared_examples_for 'Read file from hashed path' do + it 'store_path returns hashed path' do + expect(model.file.store_path).to eq(File.join(disk_hash[0..1], disk_hash[2..3], disk_hash, creation_date, model.job_id.to_s, model.id.to_s, 'ci_build_artifacts.zip')) + end + + it 'has exactly the same content' do + expect(::File.binread(model.file.path)).to eq(uploaded_content) + end + end + + context 'when a job artifact is stored in legacy_path' do + let(:job_artifact) { create(:ci_job_artifact, :legacy_archive) } + + it_behaves_like 'Read file from legacy path' + end + + context 'when the artifact file is stored in hashed_path' do + let(:job_artifact) { create(:ci_job_artifact, :archive) } + let(:disk_hash) { Digest::SHA2.hexdigest(model.project_id.to_s) } + let(:creation_date) { model.created_at.utc.strftime('%Y_%m_%d') } + + it_behaves_like 'Read file from hashed path' + + context 'when file_location column is empty' do + before do + job_artifact.update_column(:file_location, nil) + end + + it_behaves_like 'Read file from hashed path' + end + end + end + describe "#migrate!" do before do uploader.store!(fixture_file_upload('spec/fixtures/trace/sample_trace'))