Add a unique and not null constraint on the project_features.project_id column

This commit has two migrations:

1. The first prunes duplicate rows in the project_features table and leaves
   the row with the highest ID.  Since the behavior was indeterministic before
   and depended on which row the database decided to use, this change at least
   makes the permissions consistent. For example, in some cases, the Wiki may
   have been disabled but enabled in another entry.

2. The second adds a non-null constraint on the project_features.project_id
   column.

Closes #37882

Fixes a significant part of gitlab-com/migration#408.

We found that we were overcounting Wikis because of these duplicates.
On GitLab.com, there are 56 rows with duplicate entries by project_id, and 16,661 rows with NULL project_id values.
This commit is contained in:
Stan Hu 2018-05-11 11:19:49 -07:00 committed by Yorick Peterse
parent f4ef6b474c
commit 3126e89eb8
No known key found for this signature in database
GPG key ID: EDD30D2BEB691AC9
7 changed files with 135 additions and 7 deletions

View file

@ -0,0 +1,5 @@
---
title: Add a unique and not null constraint on the project_features.project_id column
merge_request:
author:
type: fixed

View file

@ -0,0 +1,43 @@
class AddUniqueConstraintToProjectFeaturesProjectId < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
class ProjectFeature < ActiveRecord::Base
self.table_name = 'project_features'
include EachBatch
end
def up
remove_duplicates
add_concurrent_index :project_features, :project_id, unique: true, name: 'index_project_features_on_project_id_unique'
remove_concurrent_index_by_name :project_features, 'index_project_features_on_project_id'
rename_index :project_features, 'index_project_features_on_project_id_unique', 'index_project_features_on_project_id'
end
def down
rename_index :project_features, 'index_project_features_on_project_id', 'index_project_features_on_project_id_old'
add_concurrent_index :project_features, :project_id
remove_concurrent_index_by_name :project_features, 'index_project_features_on_project_id_old'
end
private
def remove_duplicates
features = ProjectFeature
.select('MAX(id) AS max, COUNT(id), project_id')
.group(:project_id)
.having('COUNT(id) > 1')
features.each do |feature|
ProjectFeature
.where(project_id: feature['project_id'])
.where('id <> ?', feature['max'])
.each_batch { |batch| batch.delete_all }
end
end
end

View file

@ -0,0 +1,21 @@
class AddNotNullConstraintToProjectFeaturesProjectId < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
class ProjectFeature < ActiveRecord::Base
include EachBatch
self.table_name = 'project_features'
end
def up
ProjectFeature.where(project_id: nil).delete_all
change_column_null :project_features, :project_id, false
end
def down
change_column_null :project_features, :project_id, true
end
end

View file

@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180511090724) do
ActiveRecord::Schema.define(version: 20180512061621) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@ -1494,7 +1494,7 @@ ActiveRecord::Schema.define(version: 20180511090724) do
add_index "project_deploy_tokens", ["project_id", "deploy_token_id"], name: "index_project_deploy_tokens_on_project_id_and_deploy_token_id", unique: true, using: :btree
create_table "project_features", force: :cascade do |t|
t.integer "project_id"
t.integer "project_id", null: false
t.integer "merge_requests_access_level"
t.integer "issues_access_level"
t.integer "wiki_access_level"
@ -1505,7 +1505,7 @@ ActiveRecord::Schema.define(version: 20180511090724) do
t.integer "repository_access_level", default: 20, null: false
end
add_index "project_features", ["project_id"], name: "index_project_features_on_project_id", using: :btree
add_index "project_features", ["project_id"], name: "index_project_features_on_project_id", unique: true, using: :btree
create_table "project_group_links", force: :cascade do |t|
t.integer "project_id", null: false

View file

@ -28,7 +28,7 @@ module Gitlab
IMPORTED_OBJECT_MAX_RETRIES = 5.freeze
EXISTING_OBJECT_CHECK = %i[milestone milestones label labels project_label project_labels group_label group_labels].freeze
EXISTING_OBJECT_CHECK = %i[milestone milestones label labels project_label project_labels group_label group_labels project_feature].freeze
TOKEN_RESET_MODELS = %w[Ci::Trigger Ci::Build ProjectHook].freeze

View file

@ -0,0 +1,59 @@
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180511174224_add_unique_constraint_to_project_features_project_id.rb')
describe AddUniqueConstraintToProjectFeaturesProjectId, :migration do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:features) { table(:project_features) }
let(:migration) { described_class.new }
describe '#up' do
before do
(1..3).each do |i|
namespaces.create(id: i, name: "ns-test-#{i}", path: "ns-test-i#{i}")
projects.create!(id: i, name: "test-#{i}", path: "test-#{i}", namespace_id: i)
end
features.create!(id: 1, project_id: 1)
features.create!(id: 2, project_id: 1)
features.create!(id: 3, project_id: 2)
features.create!(id: 4, project_id: 2)
features.create!(id: 5, project_id: 2)
features.create!(id: 6, project_id: 3)
end
it 'creates a unique index and removes duplicates' do
expect(migration.index_exists?(:project_features, :project_id, unique: false, name: 'index_project_features_on_project_id')).to be true
expect { migration.up }.to change { features.count }.from(6).to(3)
expect(migration.index_exists?(:project_features, :project_id, unique: true, name: 'index_project_features_on_project_id')).to be true
expect(migration.index_exists?(:project_features, :project_id, name: 'index_project_features_on_project_id_unique')).to be false
project_1_features = features.where(project_id: 1)
expect(project_1_features.count).to eq(1)
expect(project_1_features.first.id).to eq(2)
project_2_features = features.where(project_id: 2)
expect(project_2_features.count).to eq(1)
expect(project_2_features.first.id).to eq(5)
project_3_features = features.where(project_id: 3)
expect(project_3_features.count).to eq(1)
expect(project_3_features.first.id).to eq(6)
end
end
describe '#down' do
it 'restores the original index' do
migration.up
expect(migration.index_exists?(:project_features, :project_id, unique: true, name: 'index_project_features_on_project_id')).to be true
migration.down
expect(migration.index_exists?(:project_features, :project_id, unique: false, name: 'index_project_features_on_project_id')).to be true
expect(migration.index_exists?(:project_features, :project_id, name: 'index_project_features_on_project_id_old')).to be false
end
end
end

View file

@ -1,9 +1,9 @@
require 'spec_helper'
describe Guest do
let(:public_project) { build_stubbed(:project, :public) }
let(:private_project) { build_stubbed(:project, :private) }
let(:internal_project) { build_stubbed(:project, :internal) }
set(:public_project) { create(:project, :public) }
set(:private_project) { create(:project, :private) }
set(:internal_project) { create(:project, :internal) }
describe '.can_pull?' do
context 'when project is private' do