Add 'resume' capability to parallel-rsync-repos
This commit is contained in:
parent
6d2be0212c
commit
f3ca92a062
2 changed files with 75 additions and 24 deletions
|
@ -1,31 +1,33 @@
|
|||
#!/bin/sh
|
||||
# this script should run as the 'git' user, not root, because of mkdir
|
||||
#!/usr/bin/env bash
|
||||
# this script should run as the 'git' user, not root, because 'root' should not
|
||||
# own intermediate directories created by rsync.
|
||||
#
|
||||
# Example invocation:
|
||||
# find /var/opt/gitlab/git-data/repositories -maxdepth 2 | \
|
||||
# parallel-rsync-repos /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories
|
||||
# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories
|
||||
#
|
||||
# You can also rsync to a remote destination.
|
||||
#
|
||||
# parallel-rsync-repos /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories
|
||||
# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories
|
||||
#
|
||||
# If you need to pass extra options to rsync, set the RSYNC variable
|
||||
#
|
||||
# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos /src dest
|
||||
# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos transfer-success.log /src dest
|
||||
#
|
||||
|
||||
SRC=$1
|
||||
DEST=$2
|
||||
LOGFILE=$1
|
||||
SRC=$2
|
||||
DEST=$3
|
||||
|
||||
if [ -z "$LOGFILE" ] || [ -z "$SRC" ] || [ -z "$DEST" ] ; then
|
||||
echo "Usage: $0 LOGFILE SRC DEST"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$JOBS" ] ; then
|
||||
JOBS=10
|
||||
fi
|
||||
|
||||
if [ -z "$SRC" ] || [ -z "$DEST" ] ; then
|
||||
echo "Usage: $0 SRC DEST"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$RSYNC" ] ; then
|
||||
RSYNC=rsync
|
||||
fi
|
||||
|
@ -35,5 +37,18 @@ if ! cd $SRC ; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
sed "s|$SRC|./|" |\
|
||||
parallel -j$JOBS --progress "mkdir -p $DEST/{} && $RSYNC --delete -a {}/. $DEST/{}/"
|
||||
rsyncjob() {
|
||||
relative_dir="./${1#$SRC}"
|
||||
|
||||
if ! $RSYNC --delete --relative -a "$relative_dir" "$DEST" ; then
|
||||
echo "rsync $1 failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "$1" >> $LOGFILE
|
||||
}
|
||||
|
||||
export LOGFILE SRC DEST RSYNC
|
||||
export -f rsyncjob
|
||||
|
||||
parallel -j$JOBS --progress rsyncjob
|
||||
|
|
|
@ -96,25 +96,59 @@ after switching to the new repository storage directory.
|
|||
|
||||
### Parallel rsync for all repositories known to GitLab
|
||||
|
||||
This will sync repositories with 10 rsync processes at a time.
|
||||
This will sync repositories with 10 rsync processes at a time. We keep
|
||||
track of progress so that the transfer can be restarted if necessary.
|
||||
|
||||
First we create a new directory, owned by 'git', to hold transfer
|
||||
logs. We assume the directory is empty before we start the transfer
|
||||
procedure, and that we are the only ones writing files in it.
|
||||
|
||||
```
|
||||
# Omnibus
|
||||
sudo gitlab-rake gitlab:list_repos |\
|
||||
sudo -u git \
|
||||
/usr/bin/env JOBS=10 \
|
||||
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \
|
||||
/var/opt/gitlab/git-data/repositories \
|
||||
/mnt/gitlab/repositories
|
||||
sudo mkdir /var/opt/gitlab/transfer-logs
|
||||
sudo chown git:git /var/opt/gitlab/transfer-logs
|
||||
|
||||
# Source
|
||||
sudo -u git -H mkdir /home/git/transfer-logs
|
||||
```
|
||||
|
||||
We seed the process with a list of the directories we want to copy.
|
||||
|
||||
```
|
||||
# Omnibus
|
||||
sudo -u git sh -c 'gitlab-rake gitlab:list_repos > /var/opt/gitlab/transfer-logs/all-repos-$(date +%s).txt'
|
||||
|
||||
# Source
|
||||
cd /home/git/gitlab
|
||||
sudo -u git -H bundle exec rake gitlab:list_repos |\
|
||||
sudo -u git -H \
|
||||
sudo -u git -H sh -c 'bundle exec rake gitlab:list_repos > /home/git/transfer-logs/all-repos-$(date +%s).txt'
|
||||
```
|
||||
|
||||
Now we can start the transfer. The command below is idempotent, and
|
||||
the number of jobs done by GNU Parallel should converge to zero. If it
|
||||
does not some repositories listed in all-repos-1234.txt may have been
|
||||
deleted/renamed before they could be copied.
|
||||
|
||||
```
|
||||
# Omnibus
|
||||
sudo -u git sh -c '
|
||||
cat /var/opt/gitlab/transfer-logs/* | sort | uniq -u |\
|
||||
/usr/bin/env JOBS=10 \
|
||||
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \
|
||||
/var/opt/gitlab/transfer-logs/succes-$(date +%s).log \
|
||||
/var/opt/gitlab/git-data/repositories \
|
||||
/mnt/gitlab/repositories
|
||||
'
|
||||
|
||||
# Source
|
||||
cd /home/git/gitlab
|
||||
sudo -u git -H sh -c '
|
||||
cat /home/git/transfer-logs/* | sort | uniq -u |\
|
||||
/usr/bin/env JOBS=10 \
|
||||
bin/parallel-rsync-repos \
|
||||
/home/git/transfer-logs/succes-$(date +%s).log \
|
||||
/home/git/repositories \
|
||||
/mnt/gitlab/repositories
|
||||
`
|
||||
```
|
||||
|
||||
### Parallel rsync only for repositories with recent activity
|
||||
|
@ -129,7 +163,8 @@ gitlab:list_repos' to only print repositories with recent activity.
|
|||
sudo gitlab-rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\
|
||||
sudo -u git \
|
||||
/usr/bin/env JOBS=10 \
|
||||
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \
|
||||
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \
|
||||
succes-$(date +%s).log \
|
||||
/var/opt/gitlab/git-data/repositories \
|
||||
/mnt/gitlab/repositories
|
||||
|
||||
|
@ -139,6 +174,7 @@ sudo -u git -H bundle exec rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\
|
|||
sudo -u git -H \
|
||||
/usr/bin/env JOBS=10 \
|
||||
bin/parallel-rsync-repos \
|
||||
succes-$(date +%s).log \
|
||||
/home/git/repositories \
|
||||
/mnt/gitlab/repositories
|
||||
```
|
||||
|
|
Loading…
Reference in a new issue