diff --git a/bin/parallel-rsync-repos b/bin/parallel-rsync-repos index b777056c95f..21921148fa0 100755 --- a/bin/parallel-rsync-repos +++ b/bin/parallel-rsync-repos @@ -1,31 +1,33 @@ -#!/bin/sh -# this script should run as the 'git' user, not root, because of mkdir +#!/usr/bin/env bash +# this script should run as the 'git' user, not root, because 'root' should not +# own intermediate directories created by rsync. # # Example invocation: # find /var/opt/gitlab/git-data/repositories -maxdepth 2 | \ -# parallel-rsync-repos /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories +# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories # # You can also rsync to a remote destination. # -# parallel-rsync-repos /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories +# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories # # If you need to pass extra options to rsync, set the RSYNC variable # -# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos /src dest +# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos transfer-success.log /src dest # -SRC=$1 -DEST=$2 +LOGFILE=$1 +SRC=$2 +DEST=$3 + +if [ -z "$LOGFILE" ] || [ -z "$SRC" ] || [ -z "$DEST" ] ; then + echo "Usage: $0 LOGFILE SRC DEST" + exit 1 +fi if [ -z "$JOBS" ] ; then JOBS=10 fi -if [ -z "$SRC" ] || [ -z "$DEST" ] ; then - echo "Usage: $0 SRC DEST" - exit 1 -fi - if [ -z "$RSYNC" ] ; then RSYNC=rsync fi @@ -35,5 +37,18 @@ if ! cd $SRC ; then exit 1 fi -sed "s|$SRC|./|" |\ - parallel -j$JOBS --progress "mkdir -p $DEST/{} && $RSYNC --delete -a {}/. $DEST/{}/" +rsyncjob() { + relative_dir="./${1#$SRC}" + + if ! $RSYNC --delete --relative -a "$relative_dir" "$DEST" ; then + echo "rsync $1 failed" + return 1 + fi + + echo "$1" >> $LOGFILE +} + +export LOGFILE SRC DEST RSYNC +export -f rsyncjob + +parallel -j$JOBS --progress rsyncjob diff --git a/doc/operations/moving_repositories.md b/doc/operations/moving_repositories.md index a89602b367f..39086b7a251 100644 --- a/doc/operations/moving_repositories.md +++ b/doc/operations/moving_repositories.md @@ -96,25 +96,59 @@ after switching to the new repository storage directory. ### Parallel rsync for all repositories known to GitLab -This will sync repositories with 10 rsync processes at a time. +This will sync repositories with 10 rsync processes at a time. We keep +track of progress so that the transfer can be restarted if necessary. + +First we create a new directory, owned by 'git', to hold transfer +logs. We assume the directory is empty before we start the transfer +procedure, and that we are the only ones writing files in it. ``` # Omnibus -sudo gitlab-rake gitlab:list_repos |\ - sudo -u git \ - /usr/bin/env JOBS=10 \ - /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \ - /var/opt/gitlab/git-data/repositories \ - /mnt/gitlab/repositories +sudo mkdir /var/opt/gitlab/transfer-logs +sudo chown git:git /var/opt/gitlab/transfer-logs + +# Source +sudo -u git -H mkdir /home/git/transfer-logs +``` + +We seed the process with a list of the directories we want to copy. + +``` +# Omnibus +sudo -u git sh -c 'gitlab-rake gitlab:list_repos > /var/opt/gitlab/transfer-logs/all-repos-$(date +%s).txt' # Source cd /home/git/gitlab -sudo -u git -H bundle exec rake gitlab:list_repos |\ - sudo -u git -H \ +sudo -u git -H sh -c 'bundle exec rake gitlab:list_repos > /home/git/transfer-logs/all-repos-$(date +%s).txt' +``` + +Now we can start the transfer. The command below is idempotent, and +the number of jobs done by GNU Parallel should converge to zero. If it +does not some repositories listed in all-repos-1234.txt may have been +deleted/renamed before they could be copied. + +``` +# Omnibus +sudo -u git sh -c ' +cat /var/opt/gitlab/transfer-logs/* | sort | uniq -u |\ + /usr/bin/env JOBS=10 \ + /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \ + /var/opt/gitlab/transfer-logs/succes-$(date +%s).log \ + /var/opt/gitlab/git-data/repositories \ + /mnt/gitlab/repositories +' + +# Source +cd /home/git/gitlab +sudo -u git -H sh -c ' +cat /home/git/transfer-logs/* | sort | uniq -u |\ /usr/bin/env JOBS=10 \ bin/parallel-rsync-repos \ + /home/git/transfer-logs/succes-$(date +%s).log \ /home/git/repositories \ /mnt/gitlab/repositories +` ``` ### Parallel rsync only for repositories with recent activity @@ -129,7 +163,8 @@ gitlab:list_repos' to only print repositories with recent activity. sudo gitlab-rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\ sudo -u git \ /usr/bin/env JOBS=10 \ - /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \ + /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \ + succes-$(date +%s).log \ /var/opt/gitlab/git-data/repositories \ /mnt/gitlab/repositories @@ -139,6 +174,7 @@ sudo -u git -H bundle exec rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\ sudo -u git -H \ /usr/bin/env JOBS=10 \ bin/parallel-rsync-repos \ + succes-$(date +%s).log \ /home/git/repositories \ /mnt/gitlab/repositories ```