From 0388a8255a31a03b9403e49519d16ec9a6681938 Mon Sep 17 00:00:00 2001
From: Michael Kozono <mkozono@gitlab.com>
Date: Wed, 17 Jan 2024 21:47:49 +0000
Subject: [PATCH] Enable geo_skip_download_if_exists by default

Changelog: added
EE: true
---
 .../disaster_recovery/bring_primary_back.md   | 43 ++++++++++++
 .../concerns/geo/verifiable_replicator.rb     |  2 -
 .../geo_skip_download_if_exists.yml           |  8 ---
 .../verifiable_replicator_shared_examples.rb  | 66 ++++++++-----------
 4 files changed, 71 insertions(+), 48 deletions(-)
 delete mode 100644 ee/config/feature_flags/development/geo_skip_download_if_exists.yml

diff --git a/doc/administration/geo/disaster_recovery/bring_primary_back.md b/doc/administration/geo/disaster_recovery/bring_primary_back.md
index be4f4d3f6a470..7e4a5cc01af75 100644
--- a/doc/administration/geo/disaster_recovery/bring_primary_back.md
+++ b/doc/administration/geo/disaster_recovery/bring_primary_back.md
@@ -75,3 +75,46 @@ If your objective is to have two sites again, you need to bring your **secondary
 site back online as well by repeating the first step
 ([configure the former **primary** site to be a **secondary** site](#configure-the-former-primary-site-to-be-a-secondary-site))
 for the **secondary** site.
+
+## Skipping re-transfer of data on a **secondary** site
+
+When a secondary site is added, if it contains data that would otherwise be synced from the primary, then Geo avoids re-transferring the data.
+
+- Git repositories are transferred by `git fetch`, which only transfers missing refs.
+- Geo's container registry sync code compares tags and only pulls missing tags.
+- [Blobs/files](#skipping-re-transfer-of-blobs-or-files) are skipped if they exist on the first sync.
+
+This behavior is useful when:
+
+- You do a planned failover and demote the old primary site by attaching it as a secondary site without rebuilding it.
+- You do a failover test by promoting and demoting a secondary site and reattach it without rebuilding it.
+- You restore a backup and attach the site as a secondary site.
+- You manually copy data to a secondary site to workaround a sync problem.
+- You delete or truncate registry table rows in the Geo tracking database to workaround a problem.
+- You reset the Geo tracking database to workaround a problem.
+
+### Skipping re-transfer of blobs or files
+
+> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/352530) in GitLab 16.8 [with a flag](../../feature_flags.md) named `geo_skip_download_if_exists`. Disabled by default.
+
+FLAG:
+On self-managed GitLab, by default this feature is not available. To make it available, an administrator can [enable the feature flag](../../feature_flags.md) named `geo_skip_download_if_exists`.
+On GitLab.com, this feature is not available.
+
+When you add a secondary site which has preexisting file data, then the secondary Geo site will avoid re-transferring that data. This applies to:
+
+- CI job artifacts
+- CI pipeline artifacts
+- CI secure files
+- LFS objects
+- Merge request diffs
+- Package files
+- Pages deployments
+- Terraform state versions
+- Uploads
+- Dependency proxy manifests
+- Dependency proxy blobs
+
+If the secondary site's copy is actually corrupted, then background verification will eventually fail, and the file will be resynced.
+
+Files will only be skipped in this manner if they do not have a corresponding registry record in the Geo tracking database. The conditions are strict because resyncing is almost always intentional, and we cannot risk mistakenly skipping a transfer.
diff --git a/ee/app/models/concerns/geo/verifiable_replicator.rb b/ee/app/models/concerns/geo/verifiable_replicator.rb
index df7e90ad2ee25..86a5e1fae18e3 100644
--- a/ee/app/models/concerns/geo/verifiable_replicator.rb
+++ b/ee/app/models/concerns/geo/verifiable_replicator.rb
@@ -302,8 +302,6 @@ def primary_verification_succeeded?
     #
     # @return [Boolean] whether it is a good idea to mark a replicable synced without downloading it
     def ok_to_skip_download?
-      return false unless Feature.enabled?(:geo_skip_download_if_exists)
-
       # Verification of immutable data is likely to pass if the data seems to
       # already exist. We are much less sure about mutable data, so let's
       # exclude it from this optimization for now.
diff --git a/ee/config/feature_flags/development/geo_skip_download_if_exists.yml b/ee/config/feature_flags/development/geo_skip_download_if_exists.yml
deleted file mode 100644
index 85ec36fb7a877..0000000000000
--- a/ee/config/feature_flags/development/geo_skip_download_if_exists.yml
+++ /dev/null
@@ -1,8 +0,0 @@
----
-name: geo_skip_download_if_exists
-introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/96935
-rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/435788
-milestone: '16.8'
-type: development
-group: group::geo
-default_enabled: false
diff --git a/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb
index 6a300bea8f3e4..3f2bd16dabf80 100644
--- a/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb
+++ b/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb
@@ -739,84 +739,74 @@
   describe '#ok_to_skip_download?' do
     subject(:ok_to_skip_download?) { replicator.ok_to_skip_download? }
 
-    context 'when geo_skip_download_if_exists is enabled' do
-      context 'when the registry is brand new' do
-        context 'when the model is immutable' do
+    context 'when the registry is brand new' do
+      context 'when the model is immutable' do
+        before do
+          skip 'this context does not apply to mutable models' unless replicator.immutable?
+        end
+
+        context 'when the resource already exists on this site' do
           before do
-            skip 'this context does not apply to mutable models' unless replicator.immutable?
+            allow(replicator).to receive(:resource_exists?).and_return(true)
           end
 
-          context 'when the resource already exists on this site' do
+          context 'when verification is enabled for this model' do
             before do
-              allow(replicator).to receive(:resource_exists?).and_return(true)
+              unless replicator.class.verification_enabled?
+                skip 'this context does not apply to models that are not verified'
+              end
             end
 
-            context 'when verification is enabled for this model' do
+            context 'when the resource is in verifiables' do
               before do
-                unless replicator.class.verification_enabled?
-                  skip 'this context does not apply to models that are not verified'
-                end
-              end
-
-              context 'when the resource is in verifiables' do
-                before do
-                  allow(model_record).to receive(:in_verifiables?).and_return(true)
-                end
-
-                it { is_expected.to be_truthy }
+                allow(model_record).to receive(:in_verifiables?).and_return(true)
               end
 
-              context 'when the resource is not in verifiables' do
-                before do
-                  allow(model_record).to receive(:in_verifiables?).and_return(false)
-                end
-
-                it { is_expected.to be_falsey }
-              end
+              it { is_expected.to be_truthy }
             end
 
-            context 'when verification is disabled for this model' do
+            context 'when the resource is not in verifiables' do
               before do
-                skip 'this context does not apply to models that are verified' if replicator.class.verification_enabled?
+                allow(model_record).to receive(:in_verifiables?).and_return(false)
               end
 
               it { is_expected.to be_falsey }
             end
           end
 
-          context 'when the resource does not exist on this site' do
+          context 'when verification is disabled for this model' do
             before do
-              allow(replicator).to receive(:resource_exists?).and_return(false)
+              skip 'this context does not apply to models that are verified' if replicator.class.verification_enabled?
             end
 
             it { is_expected.to be_falsey }
           end
         end
 
-        context 'when the model is mutable' do
+        context 'when the resource does not exist on this site' do
           before do
-            skip 'this context does not apply to immutable models' if replicator.immutable?
+            allow(replicator).to receive(:resource_exists?).and_return(false)
           end
 
           it { is_expected.to be_falsey }
         end
       end
 
-      context 'when the registry is not brand new (sync or verification has been attempted before)' do
+      context 'when the model is mutable' do
         before do
-          model_record.save!
-          replicator.registry.start
-          replicator.registry.synced!
-          replicator.registry.pending!
+          skip 'this context does not apply to immutable models' if replicator.immutable?
         end
 
         it { is_expected.to be_falsey }
       end
     end
 
-    context 'when geo_skip_download_if_exists is disabled' do
+    context 'when the registry is not brand new (sync or verification has been attempted before)' do
       before do
-        stub_feature_flags(geo_skip_download_if_exists: false)
+        model_record.save!
+        replicator.registry.start
+        replicator.registry.synced!
+        replicator.registry.pending!
       end
 
       it { is_expected.to be_falsey }
-- 
GitLab