diff --git a/changelogs/unreleased/26881-backup-fails-if-data-changes.yml b/changelogs/unreleased/26881-backup-fails-if-data-changes.yml new file mode 100644 index 00000000000..00bf105560b --- /dev/null +++ b/changelogs/unreleased/26881-backup-fails-if-data-changes.yml @@ -0,0 +1,4 @@ +--- +title: Add `copy` backup strategy to combat file changed errors +merge_request: 8728 +author: diff --git a/doc/raketasks/backup_restore.md b/doc/raketasks/backup_restore.md index b4e13f5812a..a5b8cd6455c 100644 --- a/doc/raketasks/backup_restore.md +++ b/doc/raketasks/backup_restore.md @@ -84,6 +84,28 @@ Deleting tmp directories...[DONE] Deleting old backups... [SKIPPING] ``` +## Backup Strategy Option + +> **Note:** Introduced as an option in 8.17 + +The default backup strategy is to essentially stream data from the respective +data locations to the backup using the Linux command `tar` and `gzip`. This works +fine in most cases, but can cause problems when data is rapidly changing. + +When data changes while `tar` is reading it, the error `file changed as we read +it` may occur, and will cause the backup process to fail. To combat this, 8.17 +introduces a new backup strategy called `copy`. The strategy copies data files +to a temporary location before calling `tar` and `gzip`, avoiding the error. + +A side-effect is that the backup process with take up to an additional 1X disk +space. The process does its best to clean up the temporary files at each stage +so the problem doesn't compound, but it could be a considerable change for large +installations. This is why the `copy` strategy is not the default in 8.17. + +To use the `copy` strategy instead of the default streaming strategy, specify +`STRATEGY=copy` in the Rake task command. For example, +`sudo gitlab-rake gitlab:backup:create STRATEGY=copy`. + ## Exclude specific directories from the backup You can choose what should be backed up by adding the environment variable `SKIP`. diff --git a/lib/backup/files.rb b/lib/backup/files.rb index cedbb289f6a..247c32c1c0a 100644 --- a/lib/backup/files.rb +++ b/lib/backup/files.rb @@ -8,6 +8,7 @@ module Backup @name = name @app_files_dir = File.realpath(app_files_dir) @files_parent_dir = File.realpath(File.join(@app_files_dir, '..')) + @backup_files_dir = File.join(Gitlab.config.backup.path, File.basename(@app_files_dir) ) @backup_tarball = File.join(Gitlab.config.backup.path, name + '.tar.gz') end @@ -15,7 +16,21 @@ module Backup def dump FileUtils.mkdir_p(Gitlab.config.backup.path) FileUtils.rm_f(backup_tarball) - run_pipeline!([%W(tar -C #{app_files_dir} -cf - .), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) + + if ENV['STRATEGY'] == 'copy' + cmd = %W(cp -a #{app_files_dir} #{Gitlab.config.backup.path}) + output, status = Gitlab::Popen.popen(cmd) + + unless status.zero? + puts output + abort 'Backup failed' + end + + run_pipeline!([%W(tar -C #{@backup_files_dir} -cf - .), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) + FileUtils.rm_rf(@backup_files_dir) + else + run_pipeline!([%W(tar -C #{app_files_dir} -cf - .), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) + end end def restore