diff --git a/changelogs/unreleased/feature-hashed-storage-repo-import.yml b/changelogs/unreleased/feature-hashed-storage-repo-import.yml new file mode 100644 index 00000000000..73c16a99053 --- /dev/null +++ b/changelogs/unreleased/feature-hashed-storage-repo-import.yml @@ -0,0 +1,5 @@ +--- +title: Improve GitLab Import rake task to work with Hashed Storage and Subgroups +merge_request: +author: +type: changed diff --git a/doc/raketasks/import.md b/doc/raketasks/import.md index 2b305cb5c99..97e9b36d1a6 100644 --- a/doc/raketasks/import.md +++ b/doc/raketasks/import.md @@ -3,49 +3,47 @@ ## Notes - The owner of the project will be the first admin -- The groups will be created as needed +- The groups will be created as needed, including subgroups - The owner of the group will be the first admin - Existing projects will be skipped +- The existing Git repos will be moved from disk (removed from the original path) ## How to use -### Create a new folder inside the git repositories path. This will be the name of the new group. +### Create a new folder to import your Git repositories from. -- For omnibus-gitlab, it is located at: `/var/opt/gitlab/git-data/repositories` by default, unless you changed -it in the `/etc/gitlab/gitlab.rb` file. -- For installations from source, it is usually located at: `/home/git/repositories` or you can see where -your repositories are located by looking at `config/gitlab.yml` under the `repositories => storages` entries -(you'll usually use the `default` storage path to start). - -New folder needs to have git user ownership and read/write/execute access for git user and its group: +The new folder needs to have git user ownership and read/write/execute access for git user and its group: ``` -sudo -u git mkdir /var/opt/gitlab/git-data/repositories/new_group +sudo -u git mkdir /var/opt/gitlab/git-data/repository-import-/new_group ``` -If you are using an installation from source, replace `/var/opt/gitlab/git-data` -with `/home/git`. - ### Copy your bare repositories inside this newly created folder: +- Any .git repositories found on any of the subfolders will be imported as projects +- Groups will be created as needed, these could be nested folders. Example: + +If we copy the repos to `/var/opt/gitlab/git-data/repository-import-`, and repo A needs to be under the groups G1 and G2, it will +have to be created under those folders: `/var/opt/gitlab/git-data/repository-import-/G1/G2/A.git`. + + ``` -sudo cp -r /old/git/foo.git /var/opt/gitlab/git-data/repositories/new_group/ +sudo cp -r /old/git/foo.git /var/opt/gitlab/git-data/repository-import-/new_group/ # Do this once when you are done copying git repositories -sudo chown -R git:git /var/opt/gitlab/git-data/repositories/new_group/ +sudo chown -R git:git /var/opt/gitlab/git-data/repository-import- ``` `foo.git` needs to be owned by the git user and git users group. -If you are using an installation from source, replace `/var/opt/gitlab/git-data` -with `/home/git`. +If you are using an installation from source, replace `/var/opt/gitlab/` with `/home/git`. ### Run the command below depending on your type of installation: #### Omnibus Installation ``` -$ sudo gitlab-rake gitlab:import:repos +$ sudo gitlab-rake gitlab:import:repos['/var/opt/gitlab/git-data/repository-import-'] ``` #### Installation from source @@ -54,16 +52,21 @@ Before running this command you need to change the directory to where your GitLa ``` $ cd /home/git/gitlab -$ sudo -u git -H bundle exec rake gitlab:import:repos RAILS_ENV=production +$ sudo -u git -H bundle exec rake gitlab:import:repos['/var/opt/gitlab/git-data/repository-import-'] RAILS_ENV=production ``` #### Example output ``` -Processing abcd.git +Processing /var/opt/gitlab/git-data/repository-import-1/a/b/c/blah.git + * Using namespace: a/b/c + * Created blah (a/b/c/blah) + * Skipping repo /var/opt/gitlab/git-data/repository-import-1/a/b/c/blah.wiki.git +Processing /var/opt/gitlab/git-data/repository-import-1/abcd.git * Created abcd (abcd.git) -Processing group/xyz.git - * Created Group group (2) +Processing /var/opt/gitlab/git-data/repository-import-1/group/xyz.git + * Using namespace: group (2) * Created xyz (group/xyz.git) + * Skipping repo /var/opt/gitlab/git-data/repository-import-1/@shared/a/b/abcd.git [...] ``` diff --git a/lib/gitlab/bare_repository_import/importer.rb b/lib/gitlab/bare_repository_import/importer.rb new file mode 100644 index 00000000000..196de667805 --- /dev/null +++ b/lib/gitlab/bare_repository_import/importer.rb @@ -0,0 +1,101 @@ +module Gitlab + module BareRepositoryImport + class Importer + NoAdminError = Class.new(StandardError) + + def self.execute(import_path) + import_path << '/' unless import_path.ends_with?('/') + repos_to_import = Dir.glob(import_path + '**/*.git') + + unless user = User.admins.order_id_asc.first + raise NoAdminError.new('No admin user found to import repositories') + end + + repos_to_import.each do |repo_path| + bare_repo = Gitlab::BareRepositoryImport::Repository.new(import_path, repo_path) + + if bare_repo.hashed? || bare_repo.wiki? + log " * Skipping repo #{bare_repo.repo_path}".color(:yellow) + + next + end + + log "Processing #{repo_path}".color(:yellow) + + new(user, bare_repo).create_project_if_needed + end + end + + attr_reader :user, :project_name, :bare_repo + + delegate :log, to: :class + delegate :project_name, :project_full_path, :group_path, :repo_path, :wiki_path, to: :bare_repo + + def initialize(user, bare_repo) + @user = user + @bare_repo = bare_repo + end + + def create_project_if_needed + if project = Project.find_by_full_path(project_full_path) + log " * #{project.name} (#{project_full_path}) exists" + + return project + end + + create_project + end + + private + + def create_project + group = find_or_create_groups + + project = Projects::CreateService.new(user, + name: project_name, + path: project_name, + skip_disk_validation: true, + namespace_id: group&.id).execute + + if project.persisted? && mv_repo(project) + log " * Created #{project.name} (#{project_full_path})".color(:green) + + ProjectCacheWorker.perform_async(project.id) + else + log " * Failed trying to create #{project.name} (#{project_full_path})".color(:red) + log " Errors: #{project.errors.messages}".color(:red) if project.errors.any? + end + + project + end + + def mv_repo(project) + FileUtils.mv(repo_path, File.join(project.repository_storage_path, project.disk_path + '.git')) + + if bare_repo.wiki_exists? + FileUtils.mv(wiki_path, File.join(project.repository_storage_path, project.disk_path + '.wiki.git')) + end + + true + rescue => e + log " * Failed to move repo: #{e.message}".color(:red) + + false + end + + def find_or_create_groups + return nil unless group_path.present? + + log " * Using namespace: #{group_path}" + + Groups::NestedCreateService.new(user, group_path: group_path).execute + end + + # This is called from within a rake task only used by Admins, so allow writing + # to STDOUT + def self.log(message) + puts message # rubocop:disable Rails/Output + end + end + end +end diff --git a/lib/gitlab/bare_repository_import/repository.rb b/lib/gitlab/bare_repository_import/repository.rb new file mode 100644 index 00000000000..8574ac6eb30 --- /dev/null +++ b/lib/gitlab/bare_repository_import/repository.rb @@ -0,0 +1,42 @@ +module Gitlab + module BareRepositoryImport + class Repository + attr_reader :group_path, :project_name, :repo_path + + def initialize(root_path, repo_path) + @root_path = root_path + @repo_path = repo_path + + # Split path into 'all/the/namespaces' and 'project_name' + @group_path, _, @project_name = repo_relative_path.rpartition('/') + end + + def wiki_exists? + File.exist?(wiki_path) + end + + def wiki? + @wiki ||= repo_path.end_with?('.wiki.git') + end + + def wiki_path + @wiki_path ||= repo_path.sub(/\.git$/, '.wiki.git') + end + + def hashed? + @hashed ||= group_path.start_with?('@hashed') + end + + def project_full_path + @project_full_path ||= "#{group_path}/#{project_name}" + end + + private + + def repo_relative_path + # Remove root path and `.git` at the end + repo_path[@root_path.size...-4] + end + end + end +end diff --git a/lib/gitlab/bare_repository_importer.rb b/lib/gitlab/bare_repository_importer.rb deleted file mode 100644 index 1d98d187805..00000000000 --- a/lib/gitlab/bare_repository_importer.rb +++ /dev/null @@ -1,97 +0,0 @@ -module Gitlab - class BareRepositoryImporter - NoAdminError = Class.new(StandardError) - - def self.execute - Gitlab.config.repositories.storages.each do |storage_name, repository_storage| - git_base_path = repository_storage['path'] - repos_to_import = Dir.glob(git_base_path + '/**/*.git') - - repos_to_import.each do |repo_path| - if repo_path.end_with?('.wiki.git') - log " * Skipping wiki repo" - next - end - - log "Processing #{repo_path}".color(:yellow) - - repo_relative_path = repo_path[repository_storage['path'].length..-1] - .sub(/^\//, '') # Remove leading `/` - .sub(/\.git$/, '') # Remove `.git` at the end - new(storage_name, repo_relative_path).create_project_if_needed - end - end - end - - attr_reader :storage_name, :full_path, :group_path, :project_path, :user - delegate :log, to: :class - - def initialize(storage_name, repo_path) - @storage_name = storage_name - @full_path = repo_path - - unless @user = User.admins.order_id_asc.first - raise NoAdminError.new('No admin user found to import repositories') - end - - @group_path, @project_path = File.split(repo_path) - @group_path = nil if @group_path == '.' - end - - def create_project_if_needed - if project = Project.find_by_full_path(full_path) - log " * #{project.name} (#{full_path}) exists" - return project - end - - create_project - end - - private - - def create_project - group = find_or_create_group - - project_params = { - name: project_path, - path: project_path, - repository_storage: storage_name, - namespace_id: group&.id, - skip_disk_validation: true - } - - project = Projects::CreateService.new(user, project_params).execute - - if project.persisted? - log " * Created #{project.name} (#{full_path})".color(:green) - ProjectCacheWorker.perform_async(project.id) - else - log " * Failed trying to create #{project.name} (#{full_path})".color(:red) - log " Errors: #{project.errors.messages}".color(:red) - end - - project - end - - def find_or_create_group - return nil unless group_path - - if namespace = Namespace.find_by_full_path(group_path) - log " * Namespace #{group_path} exists.".color(:green) - return namespace - end - - log " * Creating Group: #{group_path}" - Groups::NestedCreateService.new(user, group_path: group_path).execute - end - - # This is called from within a rake task only used by Admins, so allow writing - # to STDOUT - # - # rubocop:disable Rails/Output - def self.log(message) - puts message - end - # rubocop:enable Rails/Output - end -end diff --git a/lib/tasks/gitlab/import.rake b/lib/tasks/gitlab/import.rake index d227a0c8bdb..adfcc3cda22 100644 --- a/lib/tasks/gitlab/import.rake +++ b/lib/tasks/gitlab/import.rake @@ -2,23 +2,21 @@ namespace :gitlab do namespace :import do # How to use: # - # 1. copy the bare repos under the repository storage paths (commonly the default path is /home/git/repositories) - # 2. run: bundle exec rake gitlab:import:repos RAILS_ENV=production + # 1. copy the bare repos to a specific path that contain the group or subgroups structure as folders + # 2. run: bundle exec rake gitlab:import:repos[/path/to/repos] RAILS_ENV=production # # Notes: # * The project owner will set to the first administator of the system # * Existing projects will be skipped - # - # desc "GitLab | Import bare repositories from repositories -> storages into GitLab project instance" - task repos: :environment do - if Project.current_application_settings.hashed_storage_enabled - puts 'Cannot import repositories when Hashed Storage is enabled'.color(:red) + task :repos, [:import_path] => :environment do |_t, args| + unless args.import_path + puts 'Please specify an import path that contains the repositories'.color(:red) exit 1 end - Gitlab::BareRepositoryImporter.execute + Gitlab::BareRepositoryImport::Importer.execute(args.import_path) end end end diff --git a/spec/lib/gitlab/bare_repository_import/importer_spec.rb b/spec/lib/gitlab/bare_repository_import/importer_spec.rb new file mode 100644 index 00000000000..7f3bf5fc41c --- /dev/null +++ b/spec/lib/gitlab/bare_repository_import/importer_spec.rb @@ -0,0 +1,168 @@ +require 'spec_helper' + +describe Gitlab::BareRepositoryImport::Importer, repository: true do + let!(:admin) { create(:admin) } + let!(:base_dir) { Dir.mktmpdir + '/' } + let(:bare_repository) { Gitlab::BareRepositoryImport::Repository.new(base_dir, File.join(base_dir, "#{project_path}.git")) } + + subject(:importer) { described_class.new(admin, bare_repository) } + + before do + allow(described_class).to receive(:log) + end + + after do + FileUtils.rm_rf(base_dir) + end + + shared_examples 'importing a repository' do + describe '.execute' do + it 'creates a project for a repository in storage' do + FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git")) + fake_importer = double + + expect(described_class).to receive(:new).and_return(fake_importer) + expect(fake_importer).to receive(:create_project_if_needed) + + described_class.execute(base_dir) + end + + it 'skips wiki repos' do + repo_dir = File.join(base_dir, 'the-group', 'the-project.wiki.git') + FileUtils.mkdir_p(File.join(repo_dir)) + + expect(described_class).to receive(:log).with(" * Skipping repo #{repo_dir}") + expect(described_class).not_to receive(:new) + + described_class.execute(base_dir) + end + + context 'without admin users' do + let(:admin) { nil } + + it 'raises an error' do + expect { described_class.execute(base_dir) }.to raise_error(Gitlab::BareRepositoryImport::Importer::NoAdminError) + end + end + end + + describe '#create_project_if_needed' do + it 'starts an import for a project that did not exist' do + expect(importer).to receive(:create_project) + + importer.create_project_if_needed + end + + it 'skips importing when the project already exists' do + project = create(:project, path: 'a-project', namespace: existing_group) + + expect(importer).not_to receive(:create_project) + expect(importer).to receive(:log).with(" * #{project.name} (#{project_path}) exists") + + importer.create_project_if_needed + end + + it 'creates a project with the correct path in the database' do + importer.create_project_if_needed + + expect(Project.find_by_full_path(project_path)).not_to be_nil + end + + it 'creates the Git repo in disk' do + FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git")) + + importer.create_project_if_needed + + project = Project.find_by_full_path(project_path) + + expect(File).to exist(File.join(project.repository_storage_path, project.disk_path + '.git')) + end + + context 'hashed storage enabled' do + it 'creates a project with the correct path in the database' do + stub_application_setting(hashed_storage_enabled: true) + + importer.create_project_if_needed + + expect(Project.find_by_full_path(project_path)).not_to be_nil + end + end + end + end + + context 'with subgroups', :nested_groups do + let(:project_path) { 'a-group/a-sub-group/a-project' } + + let(:existing_group) do + group = create(:group, path: 'a-group') + create(:group, path: 'a-sub-group', parent: group) + end + + it_behaves_like 'importing a repository' + end + + context 'without subgroups' do + let(:project_path) { 'a-group/a-project' } + let(:existing_group) { create(:group, path: 'a-group') } + + it_behaves_like 'importing a repository' + end + + context 'without groups' do + let(:project_path) { 'a-project' } + + it 'starts an import for a project that did not exist' do + expect(importer).to receive(:create_project) + + importer.create_project_if_needed + end + + it 'creates a project with the correct path in the database' do + importer.create_project_if_needed + + expect(Project.find_by_full_path("#{admin.full_path}/#{project_path}")).not_to be_nil + end + + it 'creates the Git repo in disk' do + FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git")) + + importer.create_project_if_needed + + project = Project.find_by_full_path("#{admin.full_path}/#{project_path}") + + expect(File).to exist(File.join(project.repository_storage_path, project.disk_path + '.git')) + end + end + + context 'with Wiki' do + let(:project_path) { 'a-group/a-project' } + let(:existing_group) { create(:group, path: 'a-group') } + + it_behaves_like 'importing a repository' + + it 'creates the Wiki git repo in disk' do + FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git")) + FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.wiki.git")) + + importer.create_project_if_needed + + project = Project.find_by_full_path(project_path) + + expect(File).to exist(File.join(project.repository_storage_path, project.disk_path + '.wiki.git')) + end + end + + context 'when subgroups are not available' do + let(:project_path) { 'a-group/a-sub-group/a-project' } + + before do + expect(Group).to receive(:supports_nested_groups?) { false } + end + + describe '#create_project_if_needed' do + it 'raises an error' do + expect { importer.create_project_if_needed }.to raise_error('Nested groups are not supported on MySQL') + end + end + end +end diff --git a/spec/lib/gitlab/bare_repository_import/repository_spec.rb b/spec/lib/gitlab/bare_repository_import/repository_spec.rb new file mode 100644 index 00000000000..2db737f5fb6 --- /dev/null +++ b/spec/lib/gitlab/bare_repository_import/repository_spec.rb @@ -0,0 +1,51 @@ +require 'spec_helper' + +describe ::Gitlab::BareRepositoryImport::Repository do + let(:project_repo_path) { described_class.new('/full/path/', '/full/path/to/repo.git') } + + it 'stores the repo path' do + expect(project_repo_path.repo_path).to eq('/full/path/to/repo.git') + end + + it 'stores the group path' do + expect(project_repo_path.group_path).to eq('to') + end + + it 'stores the project name' do + expect(project_repo_path.project_name).to eq('repo') + end + + it 'stores the wiki path' do + expect(project_repo_path.wiki_path).to eq('/full/path/to/repo.wiki.git') + end + + describe '#wiki?' do + it 'returns true if it is a wiki' do + wiki_path = described_class.new('/full/path/', '/full/path/to/a/b/my.wiki.git') + + expect(wiki_path.wiki?).to eq(true) + end + + it 'returns false if it is not a wiki' do + expect(project_repo_path.wiki?).to eq(false) + end + end + + describe '#hashed?' do + it 'returns true if it is a hashed folder' do + path = described_class.new('/full/path/', '/full/path/@hashed/my.repo.git') + + expect(path.hashed?).to eq(true) + end + + it 'returns false if it is not a hashed folder' do + expect(project_repo_path.hashed?).to eq(false) + end + end + + describe '#project_full_path' do + it 'returns the project full path' do + expect(project_repo_path.repo_path).to eq('/full/path/to/repo.git') + end + end +end diff --git a/spec/lib/gitlab/bare_repository_importer_spec.rb b/spec/lib/gitlab/bare_repository_importer_spec.rb deleted file mode 100644 index 36d1844b5b1..00000000000 --- a/spec/lib/gitlab/bare_repository_importer_spec.rb +++ /dev/null @@ -1,100 +0,0 @@ -require 'spec_helper' - -describe Gitlab::BareRepositoryImporter, repository: true do - subject(:importer) { described_class.new('default', project_path) } - - let!(:admin) { create(:admin) } - - before do - allow(described_class).to receive(:log) - end - - shared_examples 'importing a repository' do - describe '.execute' do - it 'creates a project for a repository in storage' do - FileUtils.mkdir_p(File.join(TestEnv.repos_path, "#{project_path}.git")) - fake_importer = double - - expect(described_class).to receive(:new).with('default', project_path) - .and_return(fake_importer) - expect(fake_importer).to receive(:create_project_if_needed) - - described_class.execute - end - - it 'skips wiki repos' do - FileUtils.mkdir_p(File.join(TestEnv.repos_path, 'the-group', 'the-project.wiki.git')) - - expect(described_class).to receive(:log).with(' * Skipping wiki repo') - expect(described_class).not_to receive(:new) - - described_class.execute - end - end - - describe '#initialize' do - context 'without admin users' do - let(:admin) { nil } - - it 'raises an error' do - expect { importer }.to raise_error(Gitlab::BareRepositoryImporter::NoAdminError) - end - end - end - - describe '#create_project_if_needed' do - it 'starts an import for a project that did not exist' do - expect(importer).to receive(:create_project) - - importer.create_project_if_needed - end - - it 'skips importing when the project already exists' do - project = create(:project, path: 'a-project', namespace: existing_group) - - expect(importer).not_to receive(:create_project) - expect(importer).to receive(:log).with(" * #{project.name} (#{project_path}) exists") - - importer.create_project_if_needed - end - - it 'creates a project with the correct path in the database' do - importer.create_project_if_needed - - expect(Project.find_by_full_path(project_path)).not_to be_nil - end - end - end - - context 'with subgroups', :nested_groups do - let(:project_path) { 'a-group/a-sub-group/a-project' } - - let(:existing_group) do - group = create(:group, path: 'a-group') - create(:group, path: 'a-sub-group', parent: group) - end - - it_behaves_like 'importing a repository' - end - - context 'without subgroups' do - let(:project_path) { 'a-group/a-project' } - let(:existing_group) { create(:group, path: 'a-group') } - - it_behaves_like 'importing a repository' - end - - context 'when subgroups are not available' do - let(:project_path) { 'a-group/a-sub-group/a-project' } - - before do - expect(Group).to receive(:supports_nested_groups?) { false } - end - - describe '#create_project_if_needed' do - it 'raises an error' do - expect { importer.create_project_if_needed }.to raise_error('Nested groups are not supported on MySQL') - end - end - end -end