2018-11-05 23:45:35 -05:00
# frozen_string_literal: true
2016-05-09 08:14:32 -04:00
module Gitlab
module Database
module MigrationHelpers
2017-09-08 16:10:53 -04:00
BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
2019-07-18 11:47:01 -04:00
PERMITTED_TIMESTAMP_COLUMNS = % i [ created_at updated_at deleted_at ] . to_set . freeze
DEFAULT_TIMESTAMP_COLUMNS = % i [ created_at updated_at ] . freeze
2017-06-13 07:44:13 -04:00
# Adds `created_at` and `updated_at` columns with timezone information.
#
# This method is an improved version of Rails' built-in method `add_timestamps`.
#
2019-07-18 11:47:01 -04:00
# By default, adds `created_at` and `updated_at` columns, but these can be specified as:
#
# add_timestamps_with_timezone(:my_table, columns: [:created_at, :deleted_at])
#
# This allows you to create just the timestamps you need, saving space.
#
2017-06-13 07:44:13 -04:00
# Available options are:
2019-07-18 11:47:01 -04:00
# :default - The default value for the column.
# :null - When set to `true` the column will allow NULL values.
2017-06-13 07:44:13 -04:00
# The default is to not allow NULL values.
2019-07-18 11:47:01 -04:00
# :columns - the column names to create. Must be one
# of `Gitlab::Database::MigrationHelpers::PERMITTED_TIMESTAMP_COLUMNS`.
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
2017-06-13 07:44:13 -04:00
def add_timestamps_with_timezone ( table_name , options = { } )
options [ :null ] = false if options [ :null ] . nil?
2019-07-18 11:47:01 -04:00
columns = options . fetch ( :columns , DEFAULT_TIMESTAMP_COLUMNS )
default_value = options [ :default ]
2017-06-13 07:44:13 -04:00
2019-07-18 11:47:01 -04:00
validate_not_in_transaction! ( :add_timestamps_with_timezone , 'with default value' ) if default_value
columns . each do | column_name |
validate_timestamp_column_name! ( column_name )
2017-06-13 07:44:13 -04:00
# If default value is presented, use `add_column_with_default` method instead.
2019-07-18 11:47:01 -04:00
if default_value
2017-06-13 07:44:13 -04:00
add_column_with_default (
table_name ,
column_name ,
:datetime_with_timezone ,
2019-07-18 11:47:01 -04:00
default : default_value ,
2017-06-13 07:44:13 -04:00
allow_null : options [ :null ]
)
else
add_column ( table_name , column_name , :datetime_with_timezone , options )
end
end
end
2019-07-18 11:47:01 -04:00
# To be used in the `#down` method of migrations that
# use `#add_timestamps_with_timezone`.
#
# Available options are:
# :columns - the column names to remove. Must be one
# Default value: `DEFAULT_TIMESTAMP_COLUMNS`
#
# All options are optional.
def remove_timestamps ( table_name , options = { } )
columns = options . fetch ( :columns , DEFAULT_TIMESTAMP_COLUMNS )
columns . each do | column_name |
remove_column ( table_name , column_name )
end
end
2019-06-13 09:12:28 -04:00
# Creates a new index, concurrently
2016-05-09 08:14:32 -04:00
#
# Example:
#
# add_concurrent_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
2016-06-06 10:30:17 -04:00
def add_concurrent_index ( table_name , column_name , options = { } )
2016-05-09 08:14:32 -04:00
if transaction_open?
raise 'add_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2019-06-13 09:12:28 -04:00
options = options . merge ( { algorithm : :concurrently } )
2016-05-09 08:14:32 -04:00
2018-03-20 09:38:43 -04:00
if index_exists? ( table_name , column_name , options )
2019-07-10 15:26:47 -04:00
Rails . logger . warn " Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{ table_name } , column_name: #{ column_name } " # rubocop:disable Gitlab/RailsLogger
2018-03-20 09:38:43 -04:00
return
end
2018-08-10 19:45:46 -04:00
disable_statement_timeout do
2018-07-09 11:34:50 -04:00
add_index ( table_name , column_name , options )
end
2016-05-09 08:14:32 -04:00
end
2019-06-13 09:12:28 -04:00
# Removes an existed index, concurrently
2017-04-05 18:53:57 -04:00
#
# Example:
#
# remove_concurrent_index :users, :some_column
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index ( table_name , column_name , options = { } )
if transaction_open?
raise 'remove_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2017-05-25 07:10:34 -04:00
if supports_drop_index_concurrently?
2017-04-05 18:53:57 -04:00
options = options . merge ( { algorithm : :concurrently } )
end
2018-03-20 09:38:43 -04:00
unless index_exists? ( table_name , column_name , options )
2019-07-10 15:26:47 -04:00
Rails . logger . warn " Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{ table_name } , column_name: #{ column_name } " # rubocop:disable Gitlab/RailsLogger
2018-03-20 09:38:43 -04:00
return
end
2018-08-10 19:45:46 -04:00
disable_statement_timeout do
2018-07-09 11:34:50 -04:00
remove_index ( table_name , options . merge ( { column : column_name } ) )
end
2017-04-05 18:53:57 -04:00
end
2019-06-13 09:12:28 -04:00
# Removes an existing index, concurrently
2017-05-25 07:10:34 -04:00
#
# Example:
#
# remove_concurrent_index :users, "index_X_by_Y"
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index_by_name ( table_name , index_name , options = { } )
if transaction_open?
raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
if supports_drop_index_concurrently?
options = options . merge ( { algorithm : :concurrently } )
end
2018-03-20 09:38:43 -04:00
unless index_exists_by_name? ( table_name , index_name )
2019-07-10 15:26:47 -04:00
Rails . logger . warn " Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{ table_name } , index_name: #{ index_name } " # rubocop:disable Gitlab/RailsLogger
2018-03-20 09:38:43 -04:00
return
end
2018-08-10 19:45:46 -04:00
disable_statement_timeout do
2018-07-09 11:34:50 -04:00
remove_index ( table_name , options . merge ( { name : index_name } ) )
end
2017-05-25 07:10:34 -04:00
end
# Only available on Postgresql >= 9.2
def supports_drop_index_concurrently?
version = select_one ( " SELECT current_setting('server_version_num') AS v " ) [ 'v' ] . to_i
version > = 90200
end
2017-02-08 12:15:47 -05:00
# Adds a foreign key with only minimal locking on the tables involved.
#
2019-06-13 09:12:28 -04:00
# This method only requires minimal locking
2017-02-08 12:15:47 -05:00
#
# source - The source table containing the foreign key.
# target - The target table the key points to.
# column - The name of the column to create the foreign key on.
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
2019-07-10 15:26:47 -04:00
#
# rubocop:disable Gitlab/RailsLogger
2019-04-11 10:29:44 -04:00
def add_concurrent_foreign_key ( source , target , column : , on_delete : :cascade , name : nil )
2017-02-08 12:15:47 -05:00
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
raise 'add_concurrent_foreign_key can not be run inside a transaction'
end
2019-06-13 09:12:28 -04:00
on_delete = 'SET NULL' if on_delete == :nullify
2017-02-08 12:15:47 -05:00
2019-04-11 10:29:44 -04:00
key_name = name || concurrent_foreign_key_name ( source , column )
2017-02-08 12:15:47 -05:00
2018-03-20 10:50:07 -04:00
unless foreign_key_exists? ( source , target , column : column )
Rails . logger . warn " Foreign key not created because it exists already " \
" (this may be due to an aborted migration or similar): " \
" source: #{ source } , target: #{ target } , column: #{ column } "
# Using NOT VALID allows us to create a key without immediately
# validating it. This means we keep the ALTER TABLE lock only for a
# short period of time. The key _is_ enforced for any newly created
# data.
execute <<-EOF.strip_heredoc
ALTER TABLE #{source}
ADD CONSTRAINT #{key_name}
FOREIGN KEY ( #{column})
REFERENCES #{target} (id)
#{on_delete ? "ON DELETE #{on_delete.upcase}" : ''}
NOT VALID ;
EOF
end
2017-02-08 12:15:47 -05:00
# Validate the existing constraint. This can potentially take a very
# long time to complete, but fortunately does not lock the source table
# while running.
2018-03-20 10:50:07 -04:00
#
# Note this is a no-op in case the constraint is VALID already
2018-08-10 19:45:46 -04:00
disable_statement_timeout do
2018-07-09 11:34:50 -04:00
execute ( " ALTER TABLE #{ source } VALIDATE CONSTRAINT #{ key_name } ; " )
end
2017-02-08 12:15:47 -05:00
end
2019-07-10 15:26:47 -04:00
# rubocop:enable Gitlab/RailsLogger
2017-02-08 12:15:47 -05:00
2018-03-20 10:50:07 -04:00
def foreign_key_exists? ( source , target = nil , column : nil )
foreign_keys ( source ) . any? do | key |
if column
key . options [ :column ] . to_s == column . to_s
else
key . to_table . to_s == target . to_s
end
end
end
2017-02-21 09:07:02 -05:00
# Returns the name for a concurrent foreign key.
#
# PostgreSQL constraint names have a limit of 63 bytes. The logic used
# here is based on Rails' foreign_key_name() method, which unfortunately
# is private so we can't rely on it directly.
def concurrent_foreign_key_name ( table , column )
2019-06-19 07:55:31 -04:00
identifier = " #{ table } _ #{ column } _fk "
2019-06-19 06:36:32 -04:00
hashed_identifier = Digest :: SHA256 . hexdigest ( identifier ) . first ( 10 )
" fk_ #{ hashed_identifier } "
2017-02-21 09:07:02 -05:00
end
2016-07-14 21:39:08 -04:00
# Long-running migrations may take more than the timeout allowed by
# the database. Disable the session's statement timeout to ensure
2019-06-13 09:12:28 -04:00
# migrations don't get killed prematurely.
2018-07-09 11:34:50 -04:00
#
# There are two possible ways to disable the statement timeout:
#
# - Per transaction (this is the preferred and default mode)
# - Per connection (requires a cleanup after the execution)
#
2018-08-10 19:45:46 -04:00
# When using a per connection disable statement, code must be inside
# a block so we can automatically execute `RESET ALL` after block finishes
# otherwise the statement will still be disabled until connection is dropped
# or `RESET ALL` is executed
def disable_statement_timeout
if block_given?
begin
execute ( 'SET statement_timeout TO 0' )
2018-07-09 11:34:50 -04:00
2018-08-10 19:45:46 -04:00
yield
ensure
execute ( 'RESET ALL' )
2018-07-09 11:34:50 -04:00
end
2018-08-10 19:45:46 -04:00
else
unless transaction_open?
raise << ~ ERROR
Cannot call disable_statement_timeout ( ) without a transaction open or outside of a transaction block .
If you don ' t want to use a transaction wrap your code in a block call :
2018-07-09 11:34:50 -04:00
2018-08-10 19:45:46 -04:00
disable_statement_timeout { # code that requires disabled statement here }
2018-07-09 11:34:50 -04:00
2018-08-10 19:45:46 -04:00
This will make sure statement_timeout is disabled before and reset after the block execution is finished .
ERROR
end
2018-07-09 11:34:50 -04:00
2018-08-10 19:45:46 -04:00
execute ( 'SET LOCAL statement_timeout TO 0' )
2018-07-09 11:34:50 -04:00
end
2016-07-14 21:39:08 -04:00
end
2017-04-10 06:23:28 -04:00
def true_value
Database . true_value
end
def false_value
Database . false_value
end
2016-05-09 08:14:32 -04:00
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
2019-02-28 06:15:30 -05:00
# A `batch_size` option can also be passed to set this to a fixed number.
2016-06-15 10:42:52 -04:00
# This method will continue updating rows until no rows remain.
#
2016-06-16 06:50:11 -04:00
# When given a block this method will yield two values to the block:
2016-06-15 10:42:52 -04:00
#
# 1. An instance of `Arel::Table` for the table that is being updated.
# 2. The query to run as an Arel object.
#
# By supplying a block one can add extra conditions to the queries being
# executed. Note that the same block is used for _all_ queries.
#
# Example:
#
# update_column_in_batches(:projects, :foo, 10) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
2016-06-16 06:50:11 -04:00
# This would result in this method updating only rows where
2016-06-15 10:42:52 -04:00
# `projects.some_column` equals "hello".
2016-05-09 08:14:32 -04:00
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
2016-06-16 06:50:11 -04:00
#
2017-11-17 11:02:10 -05:00
# The `value` argument is typically a literal. To perform a computed
# update, an Arel literal can be used instead:
#
# update_value = Arel.sql('bar * baz')
#
# update_column_in_batches(:projects, :foo, update_value) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
2016-06-16 06:50:11 -04:00
# Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
# determines this method to be too complex while there's no way to make it
# less "complex" without introducing extra methods (which actually will
# make things _more_ complex).
#
# rubocop: disable Metrics/AbcSize
2019-02-28 06:15:30 -05:00
def update_column_in_batches ( table , column , value , batch_size : nil )
2017-06-21 08:31:49 -04:00
if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2016-06-15 10:42:52 -04:00
table = Arel :: Table . new ( table )
2016-05-09 08:14:32 -04:00
2016-06-15 10:42:52 -04:00
count_arel = table . project ( Arel . star . count . as ( 'count' ) )
count_arel = yield table , count_arel if block_given?
total = exec_query ( count_arel . to_sql ) . to_hash . first [ 'count' ] . to_i
2016-05-09 08:14:32 -04:00
2016-06-16 06:50:11 -04:00
return if total == 0
2019-02-28 06:15:30 -05:00
if batch_size . nil?
# Update in batches of 5% until we run out of any rows to update.
batch_size = ( ( total / 100 . 0 ) * 5 . 0 ) . ceil
max_size = 1000
2017-06-19 08:14:29 -04:00
2019-02-28 06:15:30 -05:00
# The upper limit is 1000 to ensure we don't lock too many rows. For
# example, for "merge_requests" even 1% of the table is around 35 000
# rows for GitLab.com.
batch_size = max_size if batch_size > max_size
end
2016-05-09 08:14:32 -04:00
2016-06-16 06:50:11 -04:00
start_arel = table . project ( table [ :id ] ) . order ( table [ :id ] . asc ) . take ( 1 )
start_arel = yield table , start_arel if block_given?
start_id = exec_query ( start_arel . to_sql ) . to_hash . first [ 'id' ] . to_i
2016-06-13 05:38:57 -04:00
2016-06-16 06:50:11 -04:00
loop do
2017-06-21 09:48:12 -04:00
stop_arel = table . project ( table [ :id ] )
. where ( table [ :id ] . gteq ( start_id ) )
. order ( table [ :id ] . asc )
. take ( 1 )
. skip ( batch_size )
2016-06-15 10:42:52 -04:00
stop_arel = yield table , stop_arel if block_given?
stop_row = exec_query ( stop_arel . to_sql ) . to_hash . first
2016-05-09 08:14:32 -04:00
2018-12-15 04:06:56 -05:00
update_arel = Arel :: UpdateManager . new
2017-06-21 09:48:12 -04:00
. table ( table )
. set ( [ [ table [ column ] , value ] ] )
. where ( table [ :id ] . gteq ( start_id ) )
2016-05-09 08:14:32 -04:00
if stop_row
2016-06-16 06:50:11 -04:00
stop_id = stop_row [ 'id' ] . to_i
start_id = stop_id
update_arel = update_arel . where ( table [ :id ] . lt ( stop_id ) )
2016-05-09 08:14:32 -04:00
end
2016-06-16 06:50:11 -04:00
update_arel = yield table , update_arel if block_given?
2016-06-15 10:42:52 -04:00
execute ( update_arel . to_sql )
2016-05-09 08:14:32 -04:00
2016-06-16 06:50:11 -04:00
# There are no more rows left to update.
break unless stop_row
2016-05-09 08:14:32 -04:00
end
end
# Adds a column with a default value without locking an entire table.
#
# This method runs the following steps:
#
# 1. Add the column with a default value of NULL.
2016-06-15 10:42:52 -04:00
# 2. Change the default value of the column to the specified value.
# 3. Update all existing rows in batches.
# 4. Set a `NOT NULL` constraint on the column if desired (the default).
2016-05-09 08:14:32 -04:00
#
# These steps ensure a column can be added to a large and commonly used
# table without locking the entire table for the duration of the table
# modification.
#
# table - The name of the table to update.
# column - The name of the column to add.
# type - The column type (e.g. `:integer`).
# default - The default value for the column.
2016-09-13 18:15:14 -04:00
# limit - Sets a column limit. For example, for :integer, the default is
# 4-bytes. Set `limit: 8` to allow 8-byte integers.
2016-05-09 08:14:32 -04:00
# allow_null - When set to `true` the column will allow NULL values, the
# default is to not allow NULL values.
2016-06-15 10:42:52 -04:00
#
# This method can also take a block which is passed directly to the
# `update_column_in_batches` method.
2016-09-13 18:15:14 -04:00
def add_column_with_default ( table , column , type , default : , limit : nil , allow_null : false , & block )
2016-05-09 08:14:32 -04:00
if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
2018-08-10 19:45:46 -04:00
disable_statement_timeout do
2018-07-09 11:34:50 -04:00
transaction do
if limit
add_column ( table , column , type , default : nil , limit : limit )
else
add_column ( table , column , type , default : nil )
end
# Changing the default before the update ensures any newly inserted
# rows already use the proper default value.
change_column_default ( table , column , default )
2016-09-13 18:15:14 -04:00
end
2016-05-09 08:14:32 -04:00
2018-07-09 11:34:50 -04:00
begin
2019-06-28 00:48:57 -04:00
default_after_type_cast = connection . type_cast ( default , column_for ( table , column ) )
update_column_in_batches ( table , column , default_after_type_cast , & block )
2016-06-13 05:22:58 -04:00
2018-07-09 11:34:50 -04:00
change_column_null ( table , column , false ) unless allow_null
# We want to rescue _all_ exceptions here, even those that don't inherit
# from StandardError.
rescue Exception = > error # rubocop: disable all
remove_column ( table , column )
2016-05-09 08:14:32 -04:00
2018-07-09 11:34:50 -04:00
raise error
end
2016-05-09 08:14:32 -04:00
end
end
2017-04-12 12:15:19 -04:00
# Renames a column without requiring downtime.
#
# Concurrent renames work by using database triggers to ensure both the
# old and new column are in sync. However, this method will _not_ remove
# the triggers or the old column automatically; this needs to be done
# manually in a post-deployment migration. This can be done using the
# method `cleanup_concurrent_column_rename`.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the new column. If no type is given the old column's
# type is used.
def rename_column_concurrently ( table , old , new , type : nil )
if transaction_open?
raise 'rename_column_concurrently can not be run inside a transaction'
end
2017-08-18 08:26:23 -04:00
check_trigger_permissions! ( table )
2019-09-05 08:15:16 -04:00
create_column_from ( table , old , new , type : type )
2017-05-12 10:27:30 -04:00
2017-12-21 10:44:07 -05:00
install_rename_triggers ( table , old , new )
end
2019-09-05 08:15:16 -04:00
# Reverses operations performed by rename_column_concurrently.
#
# This method takes care of removing previously installed triggers as well
# as removing the new column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
2019-08-25 16:14:52 -04:00
def undo_rename_column_concurrently ( table , old , new )
trigger_name = rename_trigger_name ( table , old , new )
check_trigger_permissions! ( table )
remove_rename_triggers_for_postgresql ( table , trigger_name )
remove_column ( table , new )
end
2017-12-21 10:44:07 -05:00
# Installs triggers in a table that keep a new column in sync with an old
# one.
#
# table - The name of the table to install the trigger in.
# old_column - The name of the old column.
# new_column - The name of the new column.
def install_rename_triggers ( table , old_column , new_column )
trigger_name = rename_trigger_name ( table , old_column , new_column )
2017-04-12 12:15:19 -04:00
quoted_table = quote_table_name ( table )
2017-12-21 10:44:07 -05:00
quoted_old = quote_column_name ( old_column )
quoted_new = quote_column_name ( new_column )
2017-04-12 12:15:19 -04:00
2019-06-13 09:12:28 -04:00
install_rename_triggers_for_postgresql (
trigger_name ,
quoted_table ,
quoted_old ,
quoted_new
)
2017-04-12 12:15:19 -04:00
end
# Changes the type of a column concurrently.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def change_column_type_concurrently ( table , column , new_type )
temp_column = " #{ column } _for_type_change "
rename_column_concurrently ( table , column , temp_column , type : new_type )
end
# Performs cleanup of a concurrent type change.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def cleanup_concurrent_column_type_change ( table , column )
temp_column = " #{ column } _for_type_change "
transaction do
# This has to be performed in a transaction as otherwise we might have
# inconsistent data.
cleanup_concurrent_column_rename ( table , column , temp_column )
rename_column ( table , temp_column , column )
end
end
# Cleans up a concurrent column name.
#
# This method takes care of removing previously installed triggers as well
# as removing the old column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
def cleanup_concurrent_column_rename ( table , old , new )
trigger_name = rename_trigger_name ( table , old , new )
2017-08-18 08:26:23 -04:00
check_trigger_permissions! ( table )
2019-06-13 09:12:28 -04:00
remove_rename_triggers_for_postgresql ( table , trigger_name )
2017-04-12 12:15:19 -04:00
remove_column ( table , old )
end
2019-09-05 08:15:16 -04:00
# Reverses the operations performed by cleanup_concurrent_column_rename.
#
# This method adds back the old_column removed
# by cleanup_concurrent_column_rename.
# It also adds back the (old_column > new_column) trigger that is removed
# by cleanup_concurrent_column_rename.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the old column. If no type is given the new column's
# type is used.
2019-08-25 16:14:52 -04:00
def undo_cleanup_concurrent_column_rename ( table , old , new , type : nil )
if transaction_open?
raise 'undo_cleanup_concurrent_column_rename can not be run inside a transaction'
end
check_trigger_permissions! ( table )
2019-09-05 08:15:16 -04:00
create_column_from ( table , new , old , type : type )
2019-08-25 16:14:52 -04:00
install_rename_triggers ( table , old , new )
end
2017-12-21 10:44:07 -05:00
# Changes the column type of a table using a background migration.
#
# Because this method uses a background migration it's more suitable for
# large tables. For small tables it's better to use
# `change_column_type_concurrently` since it can complete its work in a
# much shorter amount of time and doesn't rely on Sidekiq.
#
# Example usage:
#
# class Issue < ActiveRecord::Base
# self.table_name = 'issues'
#
# include EachBatch
#
# def self.to_migrate
# where('closed_at IS NOT NULL')
# end
# end
#
# change_column_type_using_background_migration(
# Issue.to_migrate,
# :closed_at,
# :datetime_with_timezone
# )
#
# Reverting a migration like this is done exactly the same way, just with
# a different type to migrate to (e.g. `:datetime` in the above example).
#
# relation - An ActiveRecord relation to use for scheduling jobs and
# figuring out what table we're modifying. This relation _must_
# have the EachBatch module included.
#
# column - The name of the column for which the type will be changed.
#
# new_type - The new type of the column.
#
# batch_size - The number of rows to schedule in a single background
# migration.
#
# interval - The time interval between every background migration.
def change_column_type_using_background_migration (
relation ,
column ,
new_type ,
batch_size : 10_000 ,
interval : 10 . minutes
)
2018-01-11 11:34:01 -05:00
2017-12-21 10:44:07 -05:00
unless relation . model < EachBatch
raise TypeError , 'The relation must include the EachBatch module'
end
temp_column = " #{ column } _for_type_change "
table = relation . table_name
max_index = 0
add_column ( table , temp_column , new_type )
install_rename_triggers ( table , column , temp_column )
# Schedule the jobs that will copy the data from the old column to the
2018-01-18 08:22:41 -05:00
# new one. Rows with NULL values in our source column are skipped since
# the target column is already NULL at this point.
relation . where . not ( column = > nil ) . each_batch ( of : batch_size ) do | batch , index |
2017-12-21 10:44:07 -05:00
start_id , end_id = batch . pluck ( 'MIN(id), MAX(id)' ) . first
max_index = index
BackgroundMigrationWorker . perform_in (
index * interval ,
'CopyColumn' ,
[ table , column , temp_column , start_id , end_id ]
)
end
# Schedule the renaming of the column to happen (initially) 1 hour after
# the last batch finished.
BackgroundMigrationWorker . perform_in (
( max_index * interval ) + 1 . hour ,
'CleanupConcurrentTypeChange' ,
[ table , column , temp_column ]
)
if perform_background_migration_inline?
# To ensure the schema is up to date immediately we perform the
# migration inline in dev / test environments.
Gitlab :: BackgroundMigration . steal ( 'CopyColumn' )
Gitlab :: BackgroundMigration . steal ( 'CleanupConcurrentTypeChange' )
end
end
2018-06-22 09:26:01 -04:00
# Renames a column using a background migration.
#
# Because this method uses a background migration it's more suitable for
# large tables. For small tables it's better to use
# `rename_column_concurrently` since it can complete its work in a much
# shorter amount of time and doesn't rely on Sidekiq.
#
# Example usage:
#
# rename_column_using_background_migration(
# :users,
# :feed_token,
# :rss_token
# )
#
# table - The name of the database table containing the column.
#
# old - The old column name.
#
# new - The new column name.
#
# type - The type of the new column. If no type is given the old column's
# type is used.
#
# batch_size - The number of rows to schedule in a single background
# migration.
#
# interval - The time interval between every background migration.
def rename_column_using_background_migration (
table ,
old_column ,
new_column ,
type : nil ,
batch_size : 10_000 ,
interval : 10 . minutes
)
check_trigger_permissions! ( table )
old_col = column_for ( table , old_column )
new_type = type || old_col . type
max_index = 0
add_column ( table , new_column , new_type ,
limit : old_col . limit ,
precision : old_col . precision ,
scale : old_col . scale )
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default ( table , new_column , old_col . default ) if old_col . default
install_rename_triggers ( table , old_column , new_column )
model = Class . new ( ActiveRecord :: Base ) do
self . table_name = table
include :: EachBatch
end
# Schedule the jobs that will copy the data from the old column to the
# new one. Rows with NULL values in our source column are skipped since
# the target column is already NULL at this point.
model . where . not ( old_column = > nil ) . each_batch ( of : batch_size ) do | batch , index |
start_id , end_id = batch . pluck ( 'MIN(id), MAX(id)' ) . first
max_index = index
BackgroundMigrationWorker . perform_in (
index * interval ,
'CopyColumn' ,
[ table , old_column , new_column , start_id , end_id ]
)
end
# Schedule the renaming of the column to happen (initially) 1 hour after
# the last batch finished.
BackgroundMigrationWorker . perform_in (
( max_index * interval ) + 1 . hour ,
'CleanupConcurrentRename' ,
[ table , old_column , new_column ]
)
if perform_background_migration_inline?
# To ensure the schema is up to date immediately we perform the
# migration inline in dev / test environments.
Gitlab :: BackgroundMigration . steal ( 'CopyColumn' )
Gitlab :: BackgroundMigration . steal ( 'CleanupConcurrentRename' )
end
end
2017-12-21 10:44:07 -05:00
def perform_background_migration_inline?
Rails . env . test? || Rails . env . development?
end
2017-04-12 12:15:19 -04:00
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql ( trigger , table , old , new )
execute <<-EOF.strip_heredoc
CREATE OR REPLACE FUNCTION #{trigger}()
RETURNS trigger AS
$BODY $
BEGIN
NEW . #{new} := NEW.#{old};
RETURN NEW ;
END ;
$BODY $
LANGUAGE 'plpgsql'
VOLATILE
EOF
execute <<-EOF.strip_heredoc
2019-08-24 00:20:29 -04:00
DROP TRIGGER IF EXISTS #{trigger}
ON #{table}
EOF
execute <<-EOF.strip_heredoc
2017-04-12 12:15:19 -04:00
CREATE TRIGGER #{trigger}
BEFORE INSERT OR UPDATE
ON #{table}
FOR EACH ROW
EXECUTE PROCEDURE #{trigger}()
EOF
end
# Removes the triggers used for renaming a PostgreSQL column concurrently.
def remove_rename_triggers_for_postgresql ( table , trigger )
2017-08-18 08:26:23 -04:00
execute ( " DROP TRIGGER IF EXISTS #{ trigger } ON #{ table } " )
execute ( " DROP FUNCTION IF EXISTS #{ trigger } () " )
2017-04-12 12:15:19 -04:00
end
# Returns the (base) name to use for triggers when renaming columns.
def rename_trigger_name ( table , old , new )
'trigger_' + Digest :: SHA256 . hexdigest ( " #{ table } _ #{ old } _ #{ new } " ) . first ( 12 )
end
# Returns an Array containing the indexes for the given column
def indexes_for ( table , column )
column = column . to_s
indexes ( table ) . select { | index | index . columns . include? ( column ) }
end
# Returns an Array containing the foreign keys for the given column.
def foreign_keys_for ( table , column )
column = column . to_s
foreign_keys ( table ) . select { | fk | fk . column == column }
end
# Copies all indexes for the old column to a new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_indexes ( table , old , new )
old = old . to_s
new = new . to_s
indexes_for ( table , old ) . each do | index |
new_columns = index . columns . map do | column |
column == old ? new : column
end
# This is necessary as we can't properly rename indexes such as
# "ci_taggings_idx".
unless index . name . include? ( old )
raise " The index #{ index . name } can not be copied as it does not " \
" mention the old column. You have to rename this index manually first. "
end
name = index . name . gsub ( old , new )
options = {
unique : index . unique ,
name : name ,
length : index . lengths ,
order : index . orders
}
options [ :using ] = index . using if index . using
options [ :where ] = index . where if index . where
unless index . opclasses . blank?
opclasses = index . opclasses . dup
# Copy the operator classes for the old column (if any) to the new
# column.
opclasses [ new ] = opclasses . delete ( old ) if opclasses [ old ]
options [ :opclasses ] = opclasses
end
add_concurrent_index ( table , new_columns , options )
end
end
# Copies all foreign keys for the old column to the new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_foreign_keys ( table , old , new )
foreign_keys_for ( table , old ) . each do | fk |
add_concurrent_foreign_key ( fk . from_table ,
fk . to_table ,
column : new ,
on_delete : fk . on_delete )
end
end
# Returns the column for the given table and column name.
def column_for ( table , name )
name = name . to_s
columns ( table ) . find { | column | column . name == name }
end
2017-04-07 18:07:57 -04:00
2018-10-30 06:53:01 -04:00
# This will replace the first occurrence of a string in a column with
2019-06-13 09:12:28 -04:00
# the replacement using `regexp_replace`
2017-04-07 18:07:57 -04:00
def replace_sql ( column , pattern , replacement )
quoted_pattern = Arel :: Nodes :: Quoted . new ( pattern . to_s )
quoted_replacement = Arel :: Nodes :: Quoted . new ( replacement . to_s )
2019-06-13 09:12:28 -04:00
replace = Arel :: Nodes :: NamedFunction . new (
" regexp_replace " , [ column , quoted_pattern , quoted_replacement ]
)
2017-04-07 18:07:57 -04:00
2019-06-13 09:12:28 -04:00
Arel :: Nodes :: SqlLiteral . new ( replace . to_sql )
2017-04-07 18:07:57 -04:00
end
2017-08-20 06:28:25 -04:00
2019-05-07 03:38:43 -04:00
def remove_foreign_key_if_exists ( * args )
if foreign_key_exists? ( * args )
remove_foreign_key ( * args )
end
end
2017-08-20 06:28:25 -04:00
def remove_foreign_key_without_error ( * args )
remove_foreign_key ( * args )
rescue ArgumentError
end
2017-08-22 06:47:20 -04:00
2017-08-23 02:58:55 -04:00
def sidekiq_queue_migrate ( queue_from , to : )
2017-08-22 06:47:20 -04:00
while sidekiq_queue_length ( queue_from ) > 0
Sidekiq . redis do | conn |
conn . rpoplpush " queue: #{ queue_from } " , " queue: #{ to } "
end
end
end
def sidekiq_queue_length ( queue_name )
Sidekiq . redis do | conn |
conn . llen ( " queue: #{ queue_name } " )
end
end
2017-08-18 08:26:23 -04:00
def check_trigger_permissions! ( table )
unless Grant . create_and_execute_trigger? ( table )
dbname = Database . database_name
user = Database . username
raise <<-EOF
Your database user is not allowed to create , drop , or execute triggers on the
table #{table}.
If you are using PostgreSQL you can solve this by logging in to the GitLab
database ( #{dbname}) using a super user and running:
ALTER #{user} WITH SUPERUSER
2019-06-13 09:12:28 -04:00
This query will grant the user super user permissions , ensuring you don ' t run
2017-08-18 08:26:23 -04:00
into similar problems in the future ( e . g . when new tables are created ) .
EOF
end
end
2017-09-08 16:10:53 -04:00
2017-09-11 15:20:04 -04:00
# Bulk queues background migration jobs for an entire table, batched by ID range.
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
# If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
2017-09-08 16:10:53 -04:00
#
# model_class - The table being iterated over
# job_class_name - The background migration job class as a string
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
2017-09-11 15:20:04 -04:00
# bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
2017-09-08 16:10:53 -04:00
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
2017-09-11 15:20:04 -04:00
def bulk_queue_background_migration_jobs_by_range ( model_class , job_class_name , batch_size : BACKGROUND_MIGRATION_BATCH_SIZE )
2017-09-08 16:10:53 -04:00
raise " #{ model_class } does not have an ID to use for batch ranges " unless model_class . column_names . include? ( 'id' )
jobs = [ ]
2018-12-07 08:29:53 -05:00
table_name = model_class . quoted_table_name
2017-09-08 16:10:53 -04:00
model_class . each_batch ( of : batch_size ) do | relation |
2018-12-07 08:29:53 -05:00
start_id , end_id = relation . pluck ( " MIN( #{ table_name } .id), MAX( #{ table_name } .id) " ) . first
2017-09-08 16:10:53 -04:00
if jobs . length > = BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
# Note: This code path generally only helps with many millions of rows
# We push multiple jobs at a time to reduce the time spent in
# Sidekiq/Redis operations. We're using this buffer based approach so we
# don't need to run additional queries for every range.
2017-11-29 10:30:17 -05:00
BackgroundMigrationWorker . bulk_perform_async ( jobs )
2017-09-08 16:10:53 -04:00
jobs . clear
end
jobs << [ job_class_name , [ start_id , end_id ] ]
end
2017-11-29 10:30:17 -05:00
BackgroundMigrationWorker . bulk_perform_async ( jobs ) unless jobs . empty?
2017-09-08 16:10:53 -04:00
end
2017-09-11 15:20:04 -04:00
# Queues background migration jobs for an entire table, batched by ID range.
# Each job is scheduled with a `delay_interval` in between.
# If you use a small interval, then some jobs may run at the same time.
#
2018-04-03 07:00:33 -04:00
# model_class - The table or relation being iterated over
2017-09-11 15:20:04 -04:00
# job_class_name - The background migration job class as a string
# delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
def queue_background_migration_jobs_by_range_at_intervals ( model_class , job_class_name , delay_interval , batch_size : BACKGROUND_MIGRATION_BATCH_SIZE )
raise " #{ model_class } does not have an ID to use for batch ranges " unless model_class . column_names . include? ( 'id' )
2018-01-04 10:49:15 -05:00
# To not overload the worker too much we enforce a minimum interval both
# when scheduling and performing jobs.
2018-07-19 11:16:47 -04:00
if delay_interval < BackgroundMigrationWorker . minimum_interval
delay_interval = BackgroundMigrationWorker . minimum_interval
2018-01-04 10:49:15 -05:00
end
2017-09-11 15:20:04 -04:00
model_class . each_batch ( of : batch_size ) do | relation , index |
2019-10-09 08:06:13 -04:00
start_id , end_id = relation . pluck ( Arel . sql ( 'MIN(id), MAX(id)' ) ) . first
2017-09-11 15:20:04 -04:00
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
# the same time, which is not helpful in most cases where we wish to
# spread the work over time.
BackgroundMigrationWorker . perform_in ( delay_interval * index , job_class_name , [ start_id , end_id ] )
end
end
2018-03-13 13:10:53 -04:00
2018-03-07 08:56:25 -05:00
# Fetches indexes on a column by name for postgres.
#
# This will include indexes using an expression on the column, for example:
# `CREATE INDEX CONCURRENTLY index_name ON table (LOWER(column));`
#
# We can remove this when upgrading to Rails 5 with an updated `index_exists?`:
# - https://github.com/rails/rails/commit/edc2b7718725016e988089b5fb6d6fb9d6e16882
#
# Or this can be removed when we no longer support postgres < 9.5, so we
# can use `CREATE INDEX IF NOT EXISTS`.
2018-03-13 13:10:53 -04:00
def index_exists_by_name? ( table , index )
2018-03-07 08:56:25 -05:00
# We can't fall back to the normal `index_exists?` method because that
# does not find indexes without passing a column name.
if indexes ( table ) . map ( & :name ) . include? ( index . to_s )
true
else
2019-06-13 09:12:28 -04:00
postgres_exists_by_name? ( table , index )
2018-03-07 08:56:25 -05:00
end
end
def postgres_exists_by_name? ( table , name )
index_sql = << ~ SQL
SELECT COUNT ( * )
FROM pg_index
JOIN pg_class i ON ( indexrelid = i . oid )
JOIN pg_class t ON ( indrelid = t . oid )
WHERE i . relname = '#{name}' AND t . relname = '#{table}'
SQL
connection . select_value ( index_sql ) . to_i > 0
2018-03-13 13:10:53 -04:00
end
2018-09-21 06:23:29 -04:00
2019-07-18 11:47:01 -04:00
private
2019-09-05 08:15:16 -04:00
def create_column_from ( table , old , new , type : nil )
old_col = column_for ( table , old )
new_type = type || old_col . type
add_column ( table , new , new_type ,
limit : old_col . limit ,
precision : old_col . precision ,
scale : old_col . scale )
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default ( table , new , old_col . default ) unless old_col . default . nil?
update_column_in_batches ( table , new , Arel :: Table . new ( table ) [ old ] )
change_column_null ( table , new , false ) unless old_col . null
copy_indexes ( table , old , new )
copy_foreign_keys ( table , old , new )
end
2019-07-18 11:47:01 -04:00
def validate_timestamp_column_name! ( column_name )
return if PERMITTED_TIMESTAMP_COLUMNS . member? ( column_name )
raise << ~ MESSAGE
Illegal timestamp column name ! Got #{column_name}.
Must be one of : #{PERMITTED_TIMESTAMP_COLUMNS.to_a}
MESSAGE
end
def validate_not_in_transaction! ( method_name , modifier = nil )
return unless transaction_open?
raise << ~ ERROR
#{["`#{method_name}`", modifier].compact.join(' ')} cannot be run inside a transaction.
You can disable transactions by calling ` disable_ddl_transaction! ` in the body of
your migration class
ERROR
end
2016-05-09 08:14:32 -04:00
end
end
end