Migrate sensitive web hook data in the background

This commit is contained in:
Nick Thomas 2018-09-14 18:21:28 +01:00
parent fb48eaba46
commit 466371a06c
No known key found for this signature in database
GPG key ID: 2A313A47AFADACE9
5 changed files with 216 additions and 0 deletions

View file

@ -0,0 +1,33 @@
# frozen_string_literal: true
class EncryptWebHooksColumns < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 10000
RANGE_SIZE = 100
MIGRATION = 'EncryptColumns'
COLUMNS = [:token, :url]
WebHook = ::Gitlab::BackgroundMigration::Models::EncryptColumns::WebHook
disable_ddl_transaction!
def up
WebHook.each_batch(of: BATCH_SIZE) do |relation, index|
delay = index * 2.minutes
relation.each_batch(of: RANGE_SIZE) do |relation|
range = relation.pluck('MIN(id)', 'MAX(id)').first
args = [WebHook, COLUMNS, *range]
BackgroundMigrationWorker.perform_in(delay, MIGRATION, args)
end
end
end
def down
# noop
end
end

View file

@ -0,0 +1,80 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# EncryptColumn migrates data from an unencrypted column - `foo`, say - to
# an encrypted column - `encrypted_foo`, say.
#
# For this background migration to work, the table that is migrated _has_ to
# have an `id` column as the primary key. Additionally, the encrypted column
# should be managed by attr_encrypted, and map to an attribute with the same
# name as the unencrypted column (i.e., the unencrypted column should be
# shadowed).
#
# To avoid depending on a particular version of the model in app/, add a
# model to `lib/gitlab/background_migration/models/encrypt_columns` and use
# it in the migration that enqueues the jobs, so code can be shared.
class EncryptColumns
def perform(model, attributes, from, to)
model = model.constantize if model.is_a?(String)
attributes = expand_attributes(model, Array(attributes).map(&:to_sym))
model.transaction do
# Use SELECT ... FOR UPDATE to prevent the value being changed while
# we are encrypting it
relation = model.where(id: from..to).lock
relation.each do |instance|
encrypt!(instance, attributes)
end
end
end
private
# Build a hash of { attribute => encrypted column name }
def expand_attributes(klass, attributes)
expanded = attributes.flat_map do |attribute|
attr_config = klass.encrypted_attributes[attribute]
crypt_column_name = attr_config&.fetch(:attribute)
raise "Couldn't determine encrypted column for #{klass}##{attribute}" if
crypt_column_name.nil?
[attribute, crypt_column_name]
end
Hash[*expanded]
end
# Generate ciphertext for each column and update the database
def encrypt!(instance, attributes)
to_clear = attributes
.map { |plain, crypt| apply_attribute!(instance, plain, crypt) }
.compact
.flat_map { |plain| [plain, nil] }
to_clear = Hash[*to_clear]
if instance.changed?
instance.save!
instance.update_columns(to_clear)
end
end
def apply_attribute!(instance, plain_column, crypt_column)
plaintext = instance[plain_column]
ciphertext = instance[crypt_column]
# No need to do anything if the plaintext is nil, or an encrypted
# value already exists
return nil unless plaintext.present? && !ciphertext.present?
# attr_encrypted will calculate and set the expected value for us
instance.public_send("#{plain_column}=", plaintext) # rubocop:disable GitlabSecurity/PublicSend
plain_column
end
end
end
end

View file

@ -0,0 +1,28 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module Models
module EncryptColumns
# This model is shared between synchronous and background migrations to
# encrypt the `token` and `url` columns
class WebHook < ActiveRecord::Base
include ::EachBatch
self.table_name = 'web_hooks'
self.inheritance_column = :_type_disabled
attr_encrypted :token,
mode: :per_attribute_iv,
algorithm: 'aes-256-gcm',
key: Settings.attr_encrypted_db_key_base_truncated
attr_encrypted :url,
mode: :per_attribute_iv,
algorithm: 'aes-256-gcm',
key: Settings.attr_encrypted_db_key_base_truncated
end
end
end
end
end

View file

@ -0,0 +1,69 @@
require 'spec_helper'
describe Gitlab::BackgroundMigration::EncryptColumns, :migration, schema: 20180910115836 do
let(:model) { Gitlab::BackgroundMigration::Models::EncryptColumns::WebHook }
let(:web_hooks) { table(:web_hooks) }
let(:plaintext_attrs) do
{
'encrypted_token' => nil,
'encrypted_url' => nil,
'token' => 'secret',
'url' => 'http://example.com?access_token=secret'
}
end
let(:encrypted_attrs) do
{
'encrypted_token' => be_present,
'encrypted_url' => be_present,
'token' => nil,
'url' => nil
}
end
describe '#perform' do
it 'encrypts columns for the specified range' do
hooks = web_hooks.create([plaintext_attrs] * 5).sort_by(&:id)
# Encrypt all but the first and last rows
subject.perform(model, [:token, :url], hooks[1].id, hooks[3].id)
hooks = web_hooks.where(id: hooks.map(&:id)).order(:id)
aggregate_failures do
expect(hooks[0]).to have_attributes(plaintext_attrs)
expect(hooks[1]).to have_attributes(encrypted_attrs)
expect(hooks[2]).to have_attributes(encrypted_attrs)
expect(hooks[3]).to have_attributes(encrypted_attrs)
expect(hooks[4]).to have_attributes(plaintext_attrs)
end
end
it 'acquires an exclusive lock for the update' do
relation = double('relation', each: nil)
expect(model).to receive(:where) { relation }
expect(relation).to receive(:lock) { relation }
subject.perform(model, [:token, :url], 1, 1)
end
it 'skips already-encrypted columns' do
values = {
'encrypted_token' => 'known encrypted token',
'encrypted_url' => 'known encrypted url',
'token' => 'token',
'url' => 'url'
}
hook = web_hooks.create(values)
subject.perform(model, [:token, :url], hook.id, hook.id)
hook.reload
expect(hook).to have_attributes(values)
end
end
end

View file

@ -57,6 +57,12 @@ describe WebHook do
end
end
describe 'encrypted attributes' do
subject { described_class.encrypted_attributes.keys }
it { is_expected.to contain_exactly(:token, :url) }
end
describe 'execute' do
let(:data) { { key: 'value' } }
let(:hook_name) { 'project hook' }