2017-11-27 01:57:21 -05:00
# frozen_string_literal: true
2017-11-07 15:53:24 -05:00
module Gitlab
module BackgroundMigration
2017-11-27 01:57:21 -05:00
# This class processes a batch of rows in `untracked_files_for_uploads` by
# adding each file to the `uploads` table if it does not exist.
2018-07-02 06:43:06 -04:00
class PopulateUntrackedUploads
2017-11-07 15:53:24 -05:00
def perform ( start_id , end_id )
return unless migrate?
2018-02-20 14:37:46 -05:00
files = Gitlab :: BackgroundMigration :: PopulateUntrackedUploadsDependencies :: UntrackedFile . where ( id : start_id .. end_id )
2017-12-05 15:26:20 -05:00
processed_files = insert_uploads_if_needed ( files )
processed_files . delete_all
2017-11-15 05:36:25 -05:00
drop_temp_table_if_finished
2017-11-07 15:53:24 -05:00
end
private
def migrate?
2018-02-20 14:37:46 -05:00
Gitlab :: BackgroundMigration :: PopulateUntrackedUploadsDependencies :: UntrackedFile . table_exists? &&
Gitlab :: BackgroundMigration :: PopulateUntrackedUploadsDependencies :: Upload . table_exists?
2017-11-07 15:53:24 -05:00
end
2017-11-15 05:36:25 -05:00
2017-11-24 02:12:24 -05:00
def insert_uploads_if_needed ( files )
2017-12-05 15:26:20 -05:00
filtered_files , error_files = filter_error_files ( files )
filtered_files = filter_existing_uploads ( filtered_files )
2017-11-24 02:12:24 -05:00
filtered_files = filter_deleted_models ( filtered_files )
insert ( filtered_files )
2017-12-05 15:26:20 -05:00
processed_files = files . where . not ( id : error_files . map ( & :id ) )
processed_files
end
def filter_error_files ( files )
files . partition do | file |
2019-03-13 09:42:43 -04:00
file . to_h
true
rescue = > e
msg = << ~ MSG
2017-12-05 15:26:20 -05:00
Error parsing path " #{ file . path } " :
#{e.message}
#{e.backtrace.join("\n ")}
MSG
2019-03-13 09:42:43 -04:00
Rails . logger . error ( msg )
false
2017-12-05 15:26:20 -05:00
end
2017-11-24 02:12:24 -05:00
end
def filter_existing_uploads ( files )
paths = files . map ( & :upload_path )
2018-02-20 14:37:46 -05:00
existing_paths = Gitlab :: BackgroundMigration :: PopulateUntrackedUploadsDependencies :: Upload . where ( path : paths ) . pluck ( :path ) . to_set
2017-11-24 02:12:24 -05:00
files . reject do | file |
existing_paths . include? ( file . upload_path )
end
end
2017-11-24 03:49:04 -05:00
# There are files on disk that are not in the uploads table because their
# model was deleted, and we don't delete the files on disk.
2017-11-24 02:12:24 -05:00
def filter_deleted_models ( files )
2017-11-24 03:49:04 -05:00
ids = deleted_model_ids ( files )
files . reject do | file |
ids [ file . model_type ] . include? ( file . model_id )
end
end
def deleted_model_ids ( files )
ids = {
'Appearance' = > [ ] ,
'Namespace' = > [ ] ,
'Note' = > [ ] ,
'Project' = > [ ] ,
'User' = > [ ]
}
# group model IDs by model type
files . each do | file |
ids [ file . model_type ] << file . model_id
end
ids . each do | model_type , model_ids |
2018-02-21 13:04:42 -05:00
model_class = " Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies:: #{ model_type } " . constantize
2017-11-27 01:57:21 -05:00
found_ids = model_class . where ( id : model_ids . uniq ) . pluck ( :id )
deleted_ids = ids [ model_type ] - found_ids
ids [ model_type ] = deleted_ids
2017-11-24 03:49:04 -05:00
end
ids
2017-11-24 02:12:24 -05:00
end
def insert ( files )
2017-11-24 02:49:16 -05:00
rows = files . map do | file |
file . to_h . merge ( created_at : 'NOW()' )
2017-11-24 02:12:24 -05:00
end
2017-11-24 02:49:16 -05:00
2017-11-27 01:57:21 -05:00
Gitlab :: Database . bulk_insert ( 'uploads' ,
rows ,
disable_quote : :created_at )
2017-11-24 02:12:24 -05:00
end
2017-11-15 05:36:25 -05:00
def drop_temp_table_if_finished
2018-02-20 14:37:46 -05:00
if Gitlab :: BackgroundMigration :: PopulateUntrackedUploadsDependencies :: UntrackedFile . all . empty? && ! Rails . env . test? # Dropping a table intermittently breaks test cleanup
Gitlab :: BackgroundMigration :: PopulateUntrackedUploadsDependencies :: UntrackedFile . connection . drop_table ( :untracked_files_for_uploads ,
2017-11-27 12:33:13 -05:00
if_exists : true )
2017-11-27 01:57:21 -05:00
end
2017-11-15 05:36:25 -05:00
end
2017-11-07 15:53:24 -05:00
end
end
end