Merge branch 'fix/remove-import-url-migration' into 'master'
Remove slow migration and add fix to sanitize errors Getting rid of the slow migration and preventing the Error to occur Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/19608 See merge request !5176
This commit is contained in:
commit
11ba19e3d2
|
@ -1,106 +0,0 @@
|
||||||
# Updates project records containing invalid URLs using the AddressableUrlValidator.
|
|
||||||
# This is optimized assuming the number of invalid records is low, but
|
|
||||||
# we still need to loop through all the projects with an +import_url+
|
|
||||||
# so we use batching for the latter.
|
|
||||||
#
|
|
||||||
# This migration is non-reversible as we would have to keep the old data.
|
|
||||||
|
|
||||||
class FixNoValidatableImportUrl < ActiveRecord::Migration
|
|
||||||
include Gitlab::Database::MigrationHelpers
|
|
||||||
class SqlBatches
|
|
||||||
|
|
||||||
attr_reader :results, :query
|
|
||||||
|
|
||||||
def initialize(batch_size: 1000, query:)
|
|
||||||
@offset = 0
|
|
||||||
@batch_size = batch_size
|
|
||||||
@query = query
|
|
||||||
@results = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def next?
|
|
||||||
@results = ActiveRecord::Base.connection.exec_query(batched_sql)
|
|
||||||
@offset += @batch_size
|
|
||||||
@results.any?
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def batched_sql
|
|
||||||
"#{@query} LIMIT #{@batch_size} OFFSET #{@offset}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# AddressableValidator - Snapshot of AddressableUrlValidator
|
|
||||||
module AddressableUrlValidatorSnap
|
|
||||||
extend self
|
|
||||||
|
|
||||||
def valid_url?(value)
|
|
||||||
return false unless value
|
|
||||||
|
|
||||||
valid_uri?(value) && valid_protocol?(value)
|
|
||||||
rescue Addressable::URI::InvalidURIError
|
|
||||||
false
|
|
||||||
end
|
|
||||||
|
|
||||||
def valid_uri?(value)
|
|
||||||
Addressable::URI.parse(value).is_a?(Addressable::URI)
|
|
||||||
end
|
|
||||||
|
|
||||||
def valid_protocol?(value)
|
|
||||||
value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def up
|
|
||||||
unless defined?(Addressable::URI::InvalidURIError)
|
|
||||||
say('Skipping cleaning up invalid import URLs as class from Addressable is missing')
|
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
say('Nullifying empty import URLs')
|
|
||||||
|
|
||||||
nullify_empty_urls
|
|
||||||
|
|
||||||
say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.')
|
|
||||||
|
|
||||||
process_invalid_import_urls
|
|
||||||
end
|
|
||||||
|
|
||||||
def process_invalid_import_urls
|
|
||||||
batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL")
|
|
||||||
|
|
||||||
while batches.next?
|
|
||||||
project_ids = []
|
|
||||||
|
|
||||||
batches.results.each do |result|
|
|
||||||
project_ids << result['id'] unless valid_url?(result['import_url'])
|
|
||||||
end
|
|
||||||
|
|
||||||
process_batch(project_ids)
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
def process_batch(project_ids)
|
|
||||||
Thread.new do
|
|
||||||
begin
|
|
||||||
project_ids.each { |project_id| cleanup_import_url(project_id) }
|
|
||||||
ensure
|
|
||||||
ActiveRecord::Base.connection.close
|
|
||||||
end
|
|
||||||
end.join
|
|
||||||
end
|
|
||||||
|
|
||||||
def valid_url?(url)
|
|
||||||
AddressableUrlValidatorSnap.valid_url?(url)
|
|
||||||
end
|
|
||||||
|
|
||||||
def cleanup_import_url(project_id)
|
|
||||||
execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}")
|
|
||||||
end
|
|
||||||
|
|
||||||
def nullify_empty_urls
|
|
||||||
execute("UPDATE projects SET import_url = NULL WHERE import_url = ''")
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -4,6 +4,8 @@ module Gitlab
|
||||||
regexp = URI::Parser.new.make_regexp(['http', 'https', 'ssh', 'git'])
|
regexp = URI::Parser.new.make_regexp(['http', 'https', 'ssh', 'git'])
|
||||||
|
|
||||||
content.gsub(regexp) { |url| new(url).masked_url }
|
content.gsub(regexp) { |url| new(url).masked_url }
|
||||||
|
rescue Addressable::URI::InvalidURIError
|
||||||
|
content.gsub(regexp, '')
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.valid?(url)
|
def self.valid?(url)
|
||||||
|
|
|
@ -45,6 +45,12 @@ describe Gitlab::UrlSanitizer, lib: true do
|
||||||
|
|
||||||
expect(filtered_content).to include("user@server:project.git")
|
expect(filtered_content).to include("user@server:project.git")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'returns an empty string for invalid URLs' do
|
||||||
|
filtered_content = sanitize_url('ssh://')
|
||||||
|
|
||||||
|
expect(filtered_content).to include("repository '' not found")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#sanitized_url' do
|
describe '#sanitized_url' do
|
||||||
|
|
Loading…
Reference in New Issue