From 292d117078e52d215e9fc1e5698d58c3cda3db75 Mon Sep 17 00:00:00 2001
From: James Fargher <proglottis@gmail.com>
Date: Mon, 9 May 2022 06:01:54 +0000
Subject: [PATCH] Add PREVIOUS_BACKUP option to backup.rake

PREVIOUS_BACKUP allows choosing a backup to unpack separately from
BACKUP which chooses which backup to create. This means you can create
an incremental backup based off of a previous backup without also
overwriting it.

Changelog: added
---
 doc/raketasks/backup_restore.md |  10 ++-
 lib/backup/manager.rb           |  47 ++++++++---
 spec/lib/backup/manager_spec.rb | 145 ++++++++++++++++++++++++++++++--
 3 files changed, 177 insertions(+), 25 deletions(-)

diff --git a/doc/raketasks/backup_restore.md b/doc/raketasks/backup_restore.md
index 5458e9952fa53..944936bfa07ea 100644
--- a/doc/raketasks/backup_restore.md
+++ b/doc/raketasks/backup_restore.md
@@ -379,18 +379,24 @@ sudo -u git -H bundle exec rake gitlab:backup:create GITLAB_BACKUP_MAX_CONCURREN
 
 > - Introduced in GitLab 14.9 [with a flag](../administration/feature_flags.md) named `incremental_repository_backup`. Disabled by default.
 > - [Enabled on self-managed](https://gitlab.com/gitlab-org/gitlab/-/issues/355945) in GitLab 14.10.
+> - `PREVIOUS_BACKUP` option [introduced](https://gitlab.com/gitlab-org/gitaly/-/issues/4184) in GitLab 15.0.
 
 FLAG:
 On self-managed GitLab, by default this feature is available. To hide the feature, ask an administrator to [disable the feature flag](../administration/feature_flags.md) named `incremental_repository_backup`.
 On GitLab.com, this feature is not available.
 
 Incremental backups can be faster than full backups because they only pack changes since the last backup into the backup
-bundle for each repository. There must be an existing backup to create an incremental backup from and this backup will be overwritten. You can use the `BACKUP=timestamp_of_backup` option to choose which backup will be used.
+bundle for each repository. There must be an existing backup to create an incremental backup from:
+
+- In GitLab 14.9 and 14.10, use the `BACKUP=<timestamp_of_backup>` option to choose the backup to use. The chosen previous backup is overwritten.
+- In GitLab 15.0 and later, use the `PREVIOUS_BACKUP=<timestamp_of_backup>` option to choose the backup to use. By default, a backup file is created
+  as documented in the [Backup timestamp](#backup-timestamp) section. You can override the `[TIMESTAMP]` portion of the filename by setting the 
+  [`BACKUP` environment variable](#backup-filename).
 
 To create an incremental backup, run:
 
 ```shell
-sudo gitlab-backup create INCREMENTAL=yes
+sudo gitlab-backup create INCREMENTAL=yes PREVIOUS_BACKUP=<timestamp_of_backup>
 ```
 
 Incremental backups can also be created from [an untarred backup](#skipping-tar-creation) by using `SKIP=tar`:
diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb
index f38bc625639d5..baff68d503fae 100644
--- a/lib/backup/manager.rb
+++ b/lib/backup/manager.rb
@@ -37,9 +37,10 @@ def initialize(progress, definitions: nil)
 
     def create
       if incremental?
-        unpack
+        unpack(ENV.fetch('PREVIOUS_BACKUP', ENV['BACKUP']))
         read_backup_information
         verify_backup_version
+        update_backup_information
       end
 
       @definitions.keys.each do |task_name|
@@ -79,7 +80,7 @@ def run_create_task(task_name)
       end
 
       puts_time "Dumping #{definition.human_name} ... ".color(:blue)
-      definition.task.dump(File.join(Gitlab.config.backup.path, definition.destination_path), backup_id)
+      definition.task.dump(File.join(Gitlab.config.backup.path, definition.destination_path), full_backup_id)
       puts_time "Dumping #{definition.human_name} ... ".color(:blue) + "done".color(:green)
 
     rescue Backup::DatabaseBackupError, Backup::FileBackupError => e
@@ -87,7 +88,7 @@ def run_create_task(task_name)
     end
 
     def restore
-      cleanup_required = unpack
+      cleanup_required = unpack(ENV['BACKUP'])
       read_backup_information
       verify_backup_version
 
@@ -248,6 +249,17 @@ def build_backup_information
       }
     end
 
+    def update_backup_information
+      @backup_information.merge!(
+        full_backup_id: full_backup_id,
+        db_version: ActiveRecord::Migrator.current_version.to_s,
+        backup_created_at: Time.zone.now,
+        gitlab_version: Gitlab::VERSION,
+        tar_version: tar_version,
+        installation_type: Gitlab::INSTALLATION_TYPE
+      )
+    end
+
     def backup_information
       raise Backup::Error, "#{MANIFEST_NAME} not yet loaded" unless @backup_information
 
@@ -374,8 +386,8 @@ def verify_backup_version
       end
     end
 
-    def unpack
-      if ENV['BACKUP'].blank? && non_tarred_backup?
+    def unpack(source_backup_id)
+      if source_backup_id.blank? && non_tarred_backup?
         puts_time "Non tarred backup found in #{backup_path}, using that"
 
         return false
@@ -387,14 +399,14 @@ def unpack
           puts_time "No backups found in #{backup_path}"
           puts_time "Please make sure that file name ends with #{FILE_NAME_SUFFIX}"
           exit 1
-        elsif backup_file_list.many? && ENV["BACKUP"].nil?
+        elsif backup_file_list.many? && source_backup_id.nil?
           puts_time 'Found more than one backup:'
           # print list of available backups
           puts_time " " + available_timestamps.join("\n ")
 
           if incremental?
             puts_time 'Please specify which one you want to create an incremental backup for:'
-            puts_time 'rake gitlab:backup:create INCREMENTAL=true BACKUP=timestamp_of_backup'
+            puts_time 'rake gitlab:backup:create INCREMENTAL=true PREVIOUS_BACKUP=timestamp_of_backup'
           else
             puts_time 'Please specify which one you want to restore:'
             puts_time 'rake gitlab:backup:restore BACKUP=timestamp_of_backup'
@@ -403,8 +415,8 @@ def unpack
           exit 1
         end
 
-        tar_file = if ENV['BACKUP'].present?
-                     File.basename(ENV['BACKUP']) + FILE_NAME_SUFFIX
+        tar_file = if source_backup_id.present?
+                     File.basename(source_backup_id) + FILE_NAME_SUFFIX
                    else
                      backup_file_list.first
                    end
@@ -501,12 +513,19 @@ def tar_file
       @tar_file ||= "#{backup_id}#{FILE_NAME_SUFFIX}"
     end
 
+    def full_backup_id
+      full_backup_id = backup_information[:full_backup_id]
+      full_backup_id ||= File.basename(ENV['PREVIOUS_BACKUP']) if ENV['PREVIOUS_BACKUP'].present?
+      full_backup_id ||= backup_id
+      full_backup_id
+    end
+
     def backup_id
-      @backup_id ||= if ENV['BACKUP'].present?
-                       File.basename(ENV['BACKUP'])
-                     else
-                       "#{backup_information[:backup_created_at].strftime('%s_%Y_%m_%d_')}#{backup_information[:gitlab_version]}"
-                     end
+      if ENV['BACKUP'].present?
+        File.basename(ENV['BACKUP'])
+      else
+        "#{backup_information[:backup_created_at].strftime('%s_%Y_%m_%d_')}#{backup_information[:gitlab_version]}"
+      end
     end
 
     def create_attributes
diff --git a/spec/lib/backup/manager_spec.rb b/spec/lib/backup/manager_spec.rb
index a5fd26908610f..81573b6140d25 100644
--- a/spec/lib/backup/manager_spec.rb
+++ b/spec/lib/backup/manager_spec.rb
@@ -147,6 +147,7 @@
     let(:expected_backup_contents) { %w{backup_information.yml task1.tar.gz task2.tar.gz} }
     let(:backup_time) { Time.utc(2019, 1, 1) }
     let(:backup_id) { "1546300800_2019_01_01_#{Gitlab::VERSION}" }
+    let(:full_backup_id) { backup_id }
     let(:pack_tar_file) { "#{backup_id}_gitlab_backup.tar" }
     let(:pack_tar_system_options) { { out: [pack_tar_file, 'w', Gitlab.config.backup.archive_permissions] } }
     let(:pack_tar_cmdline) { ['tar', '-cf', '-', *expected_backup_contents, pack_tar_system_options] }
@@ -166,8 +167,8 @@
       allow(Gitlab::BackupLogger).to receive(:info)
       allow(Kernel).to receive(:system).and_return(true)
 
-      allow(task1).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'task1.tar.gz'), backup_id)
-      allow(task2).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'task2.tar.gz'), backup_id)
+      allow(task1).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'task1.tar.gz'), full_backup_id)
+      allow(task2).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'task2.tar.gz'), full_backup_id)
     end
 
     it 'executes tar' do
@@ -588,6 +589,14 @@
         end
 
         expect(Kernel).not_to have_received(:system).with(*pack_tar_cmdline)
+        expect(YAML.load_file(File.join(Gitlab.config.backup.path, 'backup_information.yml'))).to include(
+          backup_created_at: backup_time.localtime,
+          db_version: be_a(String),
+          gitlab_version: Gitlab::VERSION,
+          installation_type: Gitlab::INSTALLATION_TYPE,
+          skipped: 'tar',
+          tar_version: be_a(String)
+        )
       end
     end
 
@@ -595,11 +604,11 @@
       let(:incremental_env) { 'true' }
       let(:gitlab_version) { Gitlab::VERSION }
       let(:backup_id) { "1546300800_2019_01_01_#{gitlab_version}" }
-      let(:pack_tar_file) { "#{backup_id}_gitlab_backup.tar" }
-      let(:unpack_tar_cmdline) { ['tar', '-xf', pack_tar_file] }
+      let(:unpack_tar_file) { "#{full_backup_id}_gitlab_backup.tar" }
+      let(:unpack_tar_cmdline) { ['tar', '-xf', unpack_tar_file] }
       let(:backup_information) do
         {
-          backup_created_at: backup_time,
+          backup_created_at: Time.zone.parse('2018-01-01'),
           gitlab_version: gitlab_version
         }
       end
@@ -682,7 +691,9 @@
         end
 
         it 'unpacks and packs the backup' do
-          subject.create # rubocop:disable Rails/SaveBang
+          travel_to(backup_time) do
+            subject.create # rubocop:disable Rails/SaveBang
+          end
 
           expect(Kernel).to have_received(:system).with(*unpack_tar_cmdline)
           expect(Kernel).to have_received(:system).with(*pack_tar_cmdline)
@@ -732,21 +743,137 @@
         end
       end
 
+      context 'when PREVIOUS_BACKUP variable is set to a non-existing file' do
+        before do
+          allow(Dir).to receive(:glob).and_return(
+            [
+              '1451606400_2016_01_01_gitlab_backup.tar'
+            ]
+          )
+          allow(File).to receive(:exist?).and_return(false)
+
+          stub_env('PREVIOUS_BACKUP', 'wrong')
+        end
+
+        it 'fails the operation and prints an error' do
+          expect { subject.create }.to raise_error SystemExit # rubocop:disable Rails/SaveBang
+          expect(File).to have_received(:exist?).with('wrong_gitlab_backup.tar')
+          expect(progress).to have_received(:puts)
+            .with(a_string_matching('The backup file wrong_gitlab_backup.tar does not exist'))
+        end
+      end
+
+      context 'when PREVIOUS_BACKUP variable is set to a correct file' do
+        let(:full_backup_id) { 'some_previous_backup' }
+
+        before do
+          allow(Gitlab::BackupLogger).to receive(:info)
+          allow(Dir).to receive(:glob).and_return(
+            [
+              'some_previous_backup_gitlab_backup.tar'
+            ]
+          )
+          allow(File).to receive(:exist?).with('some_previous_backup_gitlab_backup.tar').and_return(true)
+          allow(Kernel).to receive(:system).and_return(true)
+
+          stub_env('PREVIOUS_BACKUP', '/ignored/path/some_previous_backup')
+        end
+
+        it 'unpacks and packs the backup' do
+          travel_to(backup_time) do
+            subject.create # rubocop:disable Rails/SaveBang
+          end
+
+          expect(Kernel).to have_received(:system).with(*unpack_tar_cmdline)
+          expect(Kernel).to have_received(:system).with(*pack_tar_cmdline)
+        end
+
+        context 'untar fails' do
+          before do
+            expect(Kernel).to receive(:system).with(*unpack_tar_cmdline).and_return(false)
+          end
+
+          it 'logs a failure' do
+            expect do
+              travel_to(backup_time) do
+                subject.create # rubocop:disable Rails/SaveBang
+              end
+            end.to raise_error(SystemExit)
+
+            expect(Gitlab::BackupLogger).to have_received(:info).with(message: 'Unpacking backup failed')
+          end
+        end
+
+        context 'tar fails' do
+          before do
+            expect(Kernel).to receive(:system).with(*pack_tar_cmdline).and_return(false)
+          end
+
+          it 'logs a failure' do
+            expect do
+              travel_to(backup_time) do
+                subject.create # rubocop:disable Rails/SaveBang
+              end
+            end.to raise_error(Backup::Error, 'Backup failed')
+
+            expect(Gitlab::BackupLogger).to have_received(:info).with(message: "Creating archive #{pack_tar_file} failed")
+          end
+        end
+
+        context 'on version mismatch' do
+          let(:backup_information) do
+            {
+              backup_created_at: Time.zone.parse('2018-01-01'),
+              gitlab_version: "not #{gitlab_version}"
+            }
+          end
+
+          it 'stops the process' do
+            expect { subject.create }.to raise_error SystemExit # rubocop:disable Rails/SaveBang
+            expect(progress).to have_received(:puts)
+              .with(a_string_matching('GitLab version mismatch'))
+          end
+        end
+      end
+
       context 'when there is a non-tarred backup in the directory' do
+        let(:full_backup_id) { "1514764800_2018_01_01_#{Gitlab::VERSION}" }
+        let(:backup_information) do
+          {
+            backup_created_at: Time.zone.parse('2018-01-01'),
+            gitlab_version: gitlab_version,
+            skipped: 'tar'
+          }
+        end
+
         before do
           allow(Dir).to receive(:glob).and_return(
             [
               'backup_information.yml'
             ]
           )
-          allow(File).to receive(:exist?).and_return(true)
+          allow(File).to receive(:exist?).with(File.join(Gitlab.config.backup.path, 'backup_information.yml')).and_return(true)
         end
 
-        it 'selects the non-tarred backup to restore from' do
-          subject.create # rubocop:disable Rails/SaveBang
+        after do
+          FileUtils.rm(File.join(Gitlab.config.backup.path, 'backup_information.yml'), force: true)
+        end
+
+        it 'updates the non-tarred backup' do
+          travel_to(backup_time) do
+            subject.create # rubocop:disable Rails/SaveBang
+          end
 
           expect(progress).to have_received(:puts)
             .with(a_string_matching('Non tarred backup found '))
+          expect(progress).to have_received(:puts)
+            .with(a_string_matching("Backup #{backup_id} is done"))
+          expect(YAML.load_file(File.join(Gitlab.config.backup.path, 'backup_information.yml'))).to include(
+            backup_created_at: backup_time,
+            full_backup_id: full_backup_id,
+            gitlab_version: Gitlab::VERSION,
+            skipped: 'tar'
+          )
         end
 
         context 'on version mismatch' do
-- 
GitLab