2017-11-06 16:44:30 -05:00
|
|
|
module Gitlab
|
|
|
|
module BackgroundMigration
|
2017-11-09 20:17:56 -05:00
|
|
|
class PrepareUntrackedUploads
|
2017-11-07 15:53:24 -05:00
|
|
|
# For bulk_queue_background_migration_jobs_by_range
|
|
|
|
include Database::MigrationHelpers
|
|
|
|
|
2017-11-06 20:07:35 -05:00
|
|
|
FILE_PATH_BATCH_SIZE = 500
|
2017-11-14 19:11:53 -05:00
|
|
|
RELATIVE_UPLOAD_DIR = "uploads".freeze
|
|
|
|
ABSOLUTE_UPLOAD_DIR = "#{CarrierWave.root}/#{RELATIVE_UPLOAD_DIR}".freeze
|
2017-11-07 23:54:54 -05:00
|
|
|
FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze
|
2017-11-14 19:11:53 -05:00
|
|
|
START_WITH_CARRIERWAVE_ROOT_REGEX = %r{\A#{CarrierWave.root}/}
|
|
|
|
EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze
|
|
|
|
EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze
|
2017-11-06 20:07:35 -05:00
|
|
|
|
2017-11-09 20:17:56 -05:00
|
|
|
class UntrackedFile < ActiveRecord::Base
|
2017-11-07 15:53:24 -05:00
|
|
|
include EachBatch
|
|
|
|
|
2017-11-09 20:17:56 -05:00
|
|
|
self.table_name = 'untracked_files_for_uploads'
|
2017-11-06 16:44:30 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def perform
|
|
|
|
return unless migrate?
|
|
|
|
|
2017-11-09 20:17:56 -05:00
|
|
|
store_untracked_file_paths
|
2017-11-06 16:44:30 -05:00
|
|
|
schedule_populate_untracked_uploads_jobs
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def migrate?
|
2017-11-09 20:17:56 -05:00
|
|
|
UntrackedFile.table_exists?
|
2017-11-06 16:44:30 -05:00
|
|
|
end
|
|
|
|
|
2017-11-09 20:17:56 -05:00
|
|
|
def store_untracked_file_paths
|
2017-11-14 19:11:53 -05:00
|
|
|
return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR)
|
2017-11-06 20:07:35 -05:00
|
|
|
|
2017-11-14 19:11:53 -05:00
|
|
|
each_file_batch(ABSOLUTE_UPLOAD_DIR, FILE_PATH_BATCH_SIZE) do |file_paths|
|
2017-11-08 18:05:08 -05:00
|
|
|
insert_file_paths(file_paths)
|
2017-11-06 20:07:35 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-11-08 18:05:08 -05:00
|
|
|
def each_file_batch(search_dir, batch_size, &block)
|
2017-11-06 20:07:35 -05:00
|
|
|
cmd = build_find_command(search_dir)
|
2017-11-08 18:05:08 -05:00
|
|
|
|
2017-11-06 20:07:35 -05:00
|
|
|
Open3.popen2(*cmd) do |stdin, stdout, status_thread|
|
2017-11-08 18:05:08 -05:00
|
|
|
yield_paths_in_batches(stdout, batch_size, &block)
|
|
|
|
|
2017-11-06 20:07:35 -05:00
|
|
|
raise "Find command failed" unless status_thread.value.success?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-11-08 18:05:08 -05:00
|
|
|
def yield_paths_in_batches(stdout, batch_size, &block)
|
|
|
|
paths = []
|
|
|
|
|
|
|
|
stdout.each_line("\0") do |line|
|
2017-11-14 19:11:53 -05:00
|
|
|
paths << line.chomp("\0").sub(START_WITH_CARRIERWAVE_ROOT_REGEX, '')
|
2017-11-08 18:05:08 -05:00
|
|
|
|
|
|
|
if paths.size >= batch_size
|
|
|
|
yield(paths)
|
|
|
|
paths = []
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
yield(paths)
|
|
|
|
end
|
|
|
|
|
2017-11-06 20:07:35 -05:00
|
|
|
def build_find_command(search_dir)
|
2017-11-14 19:11:53 -05:00
|
|
|
cmd = %W[find #{search_dir} -type f ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune ) ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune ) -print0]
|
2017-11-06 20:07:35 -05:00
|
|
|
|
2017-11-14 17:47:32 -05:00
|
|
|
cmd = %w[ionice -c Idle] + cmd if ionice_is_available?
|
2017-11-06 20:07:35 -05:00
|
|
|
|
|
|
|
cmd
|
|
|
|
end
|
|
|
|
|
|
|
|
def ionice_is_available?
|
|
|
|
Gitlab::Utils.which('ionice')
|
|
|
|
rescue StandardError
|
|
|
|
# In this case, returning false is relatively safe, even though it isn't very nice
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
|
|
|
def insert_file_paths(file_paths)
|
2017-11-15 01:49:24 -05:00
|
|
|
ActiveRecord::Base.transaction do
|
|
|
|
file_paths.each do |file_path|
|
|
|
|
insert_file_path(file_path)
|
|
|
|
end
|
2017-11-06 20:07:35 -05:00
|
|
|
end
|
2017-11-06 16:44:30 -05:00
|
|
|
end
|
|
|
|
|
2017-11-08 15:31:51 -05:00
|
|
|
def insert_file_path(file_path)
|
2017-11-15 07:51:28 -05:00
|
|
|
if postgresql_pre_9_5?
|
|
|
|
# No easy way to do ON CONFLICT DO NOTHING before Postgres 9.5 so just use Rails
|
|
|
|
return UntrackedFile.where(path: file_path).first_or_create
|
|
|
|
end
|
|
|
|
|
2017-11-09 20:17:56 -05:00
|
|
|
table_columns_and_values = 'untracked_files_for_uploads (path, created_at, updated_at) VALUES (?, ?, ?)'
|
2017-11-08 15:31:51 -05:00
|
|
|
|
2017-11-15 07:51:28 -05:00
|
|
|
sql = if postgresql?
|
2017-11-15 06:01:35 -05:00
|
|
|
"INSERT INTO #{table_columns_and_values} ON CONFLICT DO NOTHING;"
|
|
|
|
else
|
|
|
|
"INSERT IGNORE INTO #{table_columns_and_values};"
|
|
|
|
end
|
2017-11-08 15:31:51 -05:00
|
|
|
|
|
|
|
timestamp = Time.now.utc.iso8601
|
2017-11-15 06:01:35 -05:00
|
|
|
sql = ActiveRecord::Base.send(:sanitize_sql_array, [sql, file_path, timestamp, timestamp]) # rubocop:disable GitlabSecurity/PublicSend
|
2017-11-08 15:31:51 -05:00
|
|
|
ActiveRecord::Base.connection.execute(sql)
|
|
|
|
end
|
|
|
|
|
2017-11-15 07:51:28 -05:00
|
|
|
def postgresql?
|
|
|
|
@postgresql ||= Gitlab::Database.postgresql?
|
|
|
|
end
|
|
|
|
|
|
|
|
def postgresql_pre_9_5?
|
|
|
|
@postgresql_pre_9_5 ||= postgresql? &&
|
|
|
|
ActiveRecord::Base.connection.select_value('SHOW server_version_num').to_i < 90500
|
|
|
|
end
|
|
|
|
|
2017-11-06 16:44:30 -05:00
|
|
|
def schedule_populate_untracked_uploads_jobs
|
2017-11-09 20:17:56 -05:00
|
|
|
bulk_queue_background_migration_jobs_by_range(UntrackedFile, FOLLOW_UP_MIGRATION)
|
2017-11-06 16:44:30 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|