Filter existing uploads with one query

This commit is contained in:
Michael Kozono 2017-11-23 23:12:24 -08:00
parent 7549d17f72
commit 908aacddda
2 changed files with 127 additions and 151 deletions

View file

@ -50,37 +50,25 @@ module Gitlab
}
].freeze
def ensure_tracked!
add_to_uploads_if_needed
delete
end
def add_to_uploads_if_needed
# Even though we are checking relative paths, path is enough to
# uniquely identify uploads. There is no ambiguity between
# FileUploader paths and other Uploader paths because we use the /-/
# separator kind of like an escape character. Project full_path will
# never conflict with an upload path starting with "uploads/-/".
Upload.
where(path: upload_path).
first_or_create!(
uploader: uploader,
model_type: model_type,
model_id: model_id,
size: file_size
)
def to_h
{
path: upload_path,
uploader: uploader,
model_type: model_type,
model_id: model_id,
size: file_size
}
end
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
@upload_path ||= if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
end
def uploader
@ -187,17 +175,8 @@ module Gitlab
return unless migrate?
files = UntrackedFile.where(id: start_id..end_id)
files.each do |untracked_file|
begin
untracked_file.ensure_tracked!
rescue StandardError => e
Rails.logger.warn "Failed to add untracked file to uploads: #{e.message}"
# The untracked rows will remain in the DB. We will be able to see
# which ones failed to become tracked, and then we can decide what
# to do.
end
end
insert_uploads_if_needed(files)
files.delete_all
drop_temp_table_if_finished
end
@ -208,6 +187,31 @@ module Gitlab
UntrackedFile.table_exists? && Upload.table_exists?
end
def insert_uploads_if_needed(files)
filtered_files = filter_existing_uploads(files)
filtered_files = filter_deleted_models(filtered_files)
insert(filtered_files)
end
def filter_existing_uploads(files)
paths = files.map(&:upload_path)
existing_paths = Upload.where(path: paths).pluck(:path).to_set
files.reject do |file|
existing_paths.include?(file.upload_path)
end
end
def filter_deleted_models(files)
files # TODO
end
def insert(files)
files.each do |file|
Upload.create!(file.to_h)
end
end
def drop_temp_table_if_finished
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty?
end

View file

@ -126,6 +126,91 @@ describe Gitlab::BackgroundMigration::PopulateUntrackedUploads, :migration, :sid
end.not_to change { uploads.count }.from(0)
end
end
describe 'upload outcomes for each path pattern' do
shared_examples_for 'non_markdown_file' do
let!(:expected_upload_attrs) { model.uploads.first.attributes.slice('path', 'uploader', 'size', 'checksum') }
let!(:untracked_file) { untracked_files_for_uploads.create!(path: expected_upload_attrs['path']) }
before do
model.uploads.delete_all
end
it 'creates an Upload record' do
expect do
subject.perform(1, 1000)
end.to change { model.reload.uploads.count }.from(0).to(1)
expect(model.uploads.first.attributes).to include(expected_upload_attrs)
end
end
context 'for an appearance logo file path' do
let(:model) { create(:appearance, logo: uploaded_file) }
it_behaves_like 'non_markdown_file'
end
context 'for an appearance header_logo file path' do
let(:model) { create(:appearance, header_logo: uploaded_file) }
it_behaves_like 'non_markdown_file'
end
context 'for a pre-Markdown Note attachment file path' do
class Note < ActiveRecord::Base
has_many :uploads, as: :model, dependent: :destroy
end
let(:model) { create(:note, :with_attachment) }
it_behaves_like 'non_markdown_file'
end
context 'for a user avatar file path' do
let(:model) { create(:user, :with_avatar) }
it_behaves_like 'non_markdown_file'
end
context 'for a group avatar file path' do
let(:model) { create(:group, :with_avatar) }
it_behaves_like 'non_markdown_file'
end
context 'for a project avatar file path' do
let(:model) { create(:project, :with_avatar) }
it_behaves_like 'non_markdown_file'
end
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
let(:model) { create(:project) }
before do
# Upload the file
UploadService.new(model, uploaded_file, FileUploader).execute
# Create the untracked_files_for_uploads record
untracked_files_for_uploads.create!(path: "#{Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR}/#{model.full_path}/#{model.uploads.first.path}")
# Save the expected upload attributes
@expected_upload_attrs = model.reload.uploads.first.attributes.slice('path', 'uploader', 'size', 'checksum')
# Untrack the file
model.reload.uploads.delete_all
end
it 'creates an Upload record' do
expect do
subject.perform(1, 1000)
end.to change { model.reload.uploads.count }.from(0).to(1)
expect(model.uploads.first.attributes).to include(@expected_upload_attrs)
end
end
end
end
describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
@ -141,119 +226,6 @@ describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
drop_temp_table_if_exists
end
describe '#ensure_tracked!' do
let!(:user1) { create(:user, :with_avatar) }
let!(:untracked_file) { described_class.create!(path: user1.uploads.first.path) }
context 'when the file is already in the uploads table' do
it 'does not add an upload' do
expect do
untracked_file.ensure_tracked!
end.not_to change { upload_class.count }.from(1)
end
end
context 'when the file is not already in the uploads table' do
before do
user1.uploads.delete_all
end
it 'adds an upload' do
expect do
untracked_file.ensure_tracked!
end.to change { upload_class.count }.from(0).to(1)
end
end
end
describe '#add_to_uploads_if_needed' do
shared_examples_for 'add_to_uploads_non_markdown_files' do
let!(:expected_upload_attrs) { model.uploads.first.attributes.slice('path', 'uploader', 'size', 'checksum') }
let!(:untracked_file) { described_class.create!(path: expected_upload_attrs['path']) }
before do
model.uploads.delete_all
end
it 'creates an Upload record' do
expect do
untracked_file.add_to_uploads_if_needed
end.to change { model.reload.uploads.count }.from(0).to(1)
expect(model.uploads.first.attributes).to include(expected_upload_attrs)
end
end
context 'for an appearance logo file path' do
let(:model) { create(:appearance, logo: uploaded_file) }
it_behaves_like 'add_to_uploads_non_markdown_files'
end
context 'for an appearance header_logo file path' do
let(:model) { create(:appearance, header_logo: uploaded_file) }
it_behaves_like 'add_to_uploads_non_markdown_files'
end
context 'for a pre-Markdown Note attachment file path' do
class Note < ActiveRecord::Base
has_many :uploads, as: :model, dependent: :destroy
end
let(:model) { create(:note, :with_attachment) }
it_behaves_like 'add_to_uploads_non_markdown_files'
end
context 'for a user avatar file path' do
let(:model) { create(:user, :with_avatar) }
it_behaves_like 'add_to_uploads_non_markdown_files'
end
context 'for a group avatar file path' do
let(:model) { create(:group, :with_avatar) }
it_behaves_like 'add_to_uploads_non_markdown_files'
end
context 'for a project avatar file path' do
let(:model) { create(:project, :with_avatar) }
it_behaves_like 'add_to_uploads_non_markdown_files'
end
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
let(:model) { create(:project) }
# UntrackedFile.path is different than Upload.path
let(:untracked_file) { create_untracked_file("/#{model.full_path}/#{model.uploads.first.path}") }
before do
# Upload the file
UploadService.new(model, uploaded_file, FileUploader).execute
# Create the untracked_files_for_uploads record
untracked_file
# Save the expected upload attributes
@expected_upload_attrs = model.reload.uploads.first.attributes.slice('path', 'uploader', 'size', 'checksum')
# Untrack the file
model.reload.uploads.delete_all
end
it 'creates an Upload record' do
expect do
untracked_file.add_to_uploads_if_needed
end.to change { model.reload.uploads.count }.from(0).to(1)
expect(model.uploads.first.attributes).to include(@expected_upload_attrs)
end
end
end
describe '#upload_path' do
def assert_upload_path(file_path, expected_upload_path)
untracked_file = create_untracked_file(file_path)