diff --git a/changelogs/unreleased/53763-fix-encrypt-columns-data-loss.yml b/changelogs/unreleased/53763-fix-encrypt-columns-data-loss.yml new file mode 100644 index 00000000000..44362a8622e --- /dev/null +++ b/changelogs/unreleased/53763-fix-encrypt-columns-data-loss.yml @@ -0,0 +1,5 @@ +--- +title: Correctly handle data-loss scenarios when encrypting columns +merge_request: 23306 +author: +type: fixed diff --git a/config/initializers/attr_encrypted_no_db_connection.rb b/config/initializers/attr_encrypted_no_db_connection.rb index e007666b852..7ad458929db 100644 --- a/config/initializers/attr_encrypted_no_db_connection.rb +++ b/config/initializers/attr_encrypted_no_db_connection.rb @@ -1,7 +1,18 @@ module AttrEncrypted module Adapters module ActiveRecord - module DBConnectionQuerier + module GitlabMonkeyPatches + # Prevent attr_encrypted from defining virtual accessors for encryption + # data when the code and schema are out of sync. See this issue for more + # details: https://github.com/attr-encrypted/attr_encrypted/issues/332 + def attribute_instance_methods_as_symbols_available? + false + end + + # Prevent attr_encrypted from checking out a database connection + # indefinitely. The result of this method is only used when the former + # is true, but it is called unconditionally, so there is still value to + # ensuring the connection is released def attribute_instance_methods_as_symbols # Use with_connection so the connection doesn't stay pinned to the thread. connected = ::ActiveRecord::Base.connection_pool.with_connection(&:active?) rescue false @@ -15,7 +26,16 @@ module AttrEncrypted end end end - prepend DBConnectionQuerier end end end + +# As of v3.1.0, the attr_encrypted gem defines the AttrEncrypted and +# AttrEncrypted::Adapters::ActiveRecord modules, and uses "extend" to mix them +# into the ActiveRecord::Base class. This intervention overrides utility methods +# defined by attr_encrypted to fix two bugs, as detailed above. +# +# The methods are used here: https://github.com/attr-encrypted/attr_encrypted/blob/3.1.0/lib/attr_encrypted.rb#L145-158 +ActiveSupport.on_load(:active_record) do + extend AttrEncrypted::Adapters::ActiveRecord::GitlabMonkeyPatches +end diff --git a/lib/gitlab/background_migration/encrypt_columns.rb b/lib/gitlab/background_migration/encrypt_columns.rb index 0d333e47e7b..bd5f12276ab 100644 --- a/lib/gitlab/background_migration/encrypt_columns.rb +++ b/lib/gitlab/background_migration/encrypt_columns.rb @@ -17,6 +17,12 @@ module Gitlab class EncryptColumns def perform(model, attributes, from, to) model = model.constantize if model.is_a?(String) + + # If sidekiq hasn't undergone a restart, its idea of what columns are + # present may be inaccurate, so ensure this is as fresh as possible + model.reset_column_information + model.define_attribute_methods + attributes = expand_attributes(model, Array(attributes).map(&:to_sym)) model.transaction do @@ -41,6 +47,14 @@ module Gitlab raise "Couldn't determine encrypted column for #{klass}##{attribute}" if crypt_column_name.nil? + raise "#{klass} source column: #{attribute} is missing" unless + klass.column_names.include?(attribute.to_s) + + # Running the migration without the destination column being present + # leads to data loss + raise "#{klass} destination column: #{crypt_column_name} is missing" unless + klass.column_names.include?(crypt_column_name.to_s) + [attribute, crypt_column_name] end diff --git a/spec/initializers/attr_encrypted_no_db_connection_spec.rb b/spec/initializers/attr_encrypted_no_db_connection_spec.rb new file mode 100644 index 00000000000..2da9f1cbd96 --- /dev/null +++ b/spec/initializers/attr_encrypted_no_db_connection_spec.rb @@ -0,0 +1,30 @@ +require 'spec_helper' + +describe 'GitLab monkey-patches to AttrEncrypted' do + describe '#attribute_instance_methods_as_symbols_available?' do + it 'returns false' do + expect(ActiveRecord::Base.__send__(:attribute_instance_methods_as_symbols_available?)).to be_falsy + end + + it 'does not define virtual attributes' do + klass = Class.new(ActiveRecord::Base) do + # We need some sort of table to work on + self.table_name = 'projects' + + attr_encrypted :foo + end + + instance = klass.new + + aggregate_failures do + %w[ + encrypted_foo encrypted_foo= + encrypted_foo_iv encrypted_foo_iv= + encrypted_foo_salt encrypted_foo_salt= + ].each do |method_name| + expect(instance).not_to respond_to(method_name) + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/encrypt_columns_spec.rb b/spec/lib/gitlab/background_migration/encrypt_columns_spec.rb index 2a869446753..1d9bac79dcd 100644 --- a/spec/lib/gitlab/background_migration/encrypt_columns_spec.rb +++ b/spec/lib/gitlab/background_migration/encrypt_columns_spec.rb @@ -65,5 +65,30 @@ describe Gitlab::BackgroundMigration::EncryptColumns, :migration, schema: 201809 expect(hook).to have_attributes(values) end + + it 'reloads the model column information' do + expect(model).to receive(:reset_column_information).and_call_original + expect(model).to receive(:define_attribute_methods).and_call_original + + subject.perform(model, [:token, :url], 1, 1) + end + + it 'fails if a source column is not present' do + columns = model.columns.reject { |c| c.name == 'url' } + allow(model).to receive(:columns) { columns } + + expect do + subject.perform(model, [:token, :url], 1, 1) + end.to raise_error(/source column: url is missing/) + end + + it 'fails if a destination column is not present' do + columns = model.columns.reject { |c| c.name == 'encrypted_url' } + allow(model).to receive(:columns) { columns } + + expect do + subject.perform(model, [:token, :url], 1, 1) + end.to raise_error(/destination column: encrypted_url is missing/) + end end end