From 0388a8255a31a03b9403e49519d16ec9a6681938 Mon Sep 17 00:00:00 2001 From: Michael Kozono <mkozono@gitlab.com> Date: Wed, 17 Jan 2024 21:47:49 +0000 Subject: [PATCH] Enable geo_skip_download_if_exists by default Changelog: added EE: true --- .../disaster_recovery/bring_primary_back.md | 43 ++++++++++++ .../concerns/geo/verifiable_replicator.rb | 2 - .../geo_skip_download_if_exists.yml | 8 --- .../verifiable_replicator_shared_examples.rb | 66 ++++++++----------- 4 files changed, 71 insertions(+), 48 deletions(-) delete mode 100644 ee/config/feature_flags/development/geo_skip_download_if_exists.yml diff --git a/doc/administration/geo/disaster_recovery/bring_primary_back.md b/doc/administration/geo/disaster_recovery/bring_primary_back.md index be4f4d3f6a470..7e4a5cc01af75 100644 --- a/doc/administration/geo/disaster_recovery/bring_primary_back.md +++ b/doc/administration/geo/disaster_recovery/bring_primary_back.md @@ -75,3 +75,46 @@ If your objective is to have two sites again, you need to bring your **secondary site back online as well by repeating the first step ([configure the former **primary** site to be a **secondary** site](#configure-the-former-primary-site-to-be-a-secondary-site)) for the **secondary** site. + +## Skipping re-transfer of data on a **secondary** site + +When a secondary site is added, if it contains data that would otherwise be synced from the primary, then Geo avoids re-transferring the data. + +- Git repositories are transferred by `git fetch`, which only transfers missing refs. +- Geo's container registry sync code compares tags and only pulls missing tags. +- [Blobs/files](#skipping-re-transfer-of-blobs-or-files) are skipped if they exist on the first sync. + +This behavior is useful when: + +- You do a planned failover and demote the old primary site by attaching it as a secondary site without rebuilding it. +- You do a failover test by promoting and demoting a secondary site and reattach it without rebuilding it. +- You restore a backup and attach the site as a secondary site. +- You manually copy data to a secondary site to workaround a sync problem. +- You delete or truncate registry table rows in the Geo tracking database to workaround a problem. +- You reset the Geo tracking database to workaround a problem. + +### Skipping re-transfer of blobs or files + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/352530) in GitLab 16.8 [with a flag](../../feature_flags.md) named `geo_skip_download_if_exists`. Disabled by default. + +FLAG: +On self-managed GitLab, by default this feature is not available. To make it available, an administrator can [enable the feature flag](../../feature_flags.md) named `geo_skip_download_if_exists`. +On GitLab.com, this feature is not available. + +When you add a secondary site which has preexisting file data, then the secondary Geo site will avoid re-transferring that data. This applies to: + +- CI job artifacts +- CI pipeline artifacts +- CI secure files +- LFS objects +- Merge request diffs +- Package files +- Pages deployments +- Terraform state versions +- Uploads +- Dependency proxy manifests +- Dependency proxy blobs + +If the secondary site's copy is actually corrupted, then background verification will eventually fail, and the file will be resynced. + +Files will only be skipped in this manner if they do not have a corresponding registry record in the Geo tracking database. The conditions are strict because resyncing is almost always intentional, and we cannot risk mistakenly skipping a transfer. diff --git a/ee/app/models/concerns/geo/verifiable_replicator.rb b/ee/app/models/concerns/geo/verifiable_replicator.rb index df7e90ad2ee25..86a5e1fae18e3 100644 --- a/ee/app/models/concerns/geo/verifiable_replicator.rb +++ b/ee/app/models/concerns/geo/verifiable_replicator.rb @@ -302,8 +302,6 @@ def primary_verification_succeeded? # # @return [Boolean] whether it is a good idea to mark a replicable synced without downloading it def ok_to_skip_download? - return false unless Feature.enabled?(:geo_skip_download_if_exists) - # Verification of immutable data is likely to pass if the data seems to # already exist. We are much less sure about mutable data, so let's # exclude it from this optimization for now. diff --git a/ee/config/feature_flags/development/geo_skip_download_if_exists.yml b/ee/config/feature_flags/development/geo_skip_download_if_exists.yml deleted file mode 100644 index 85ec36fb7a877..0000000000000 --- a/ee/config/feature_flags/development/geo_skip_download_if_exists.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -name: geo_skip_download_if_exists -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/96935 -rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/435788 -milestone: '16.8' -type: development -group: group::geo -default_enabled: false diff --git a/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb index 6a300bea8f3e4..3f2bd16dabf80 100644 --- a/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb +++ b/ee/spec/support/shared_examples/models/concerns/verifiable_replicator_shared_examples.rb @@ -739,84 +739,74 @@ describe '#ok_to_skip_download?' do subject(:ok_to_skip_download?) { replicator.ok_to_skip_download? } - context 'when geo_skip_download_if_exists is enabled' do - context 'when the registry is brand new' do - context 'when the model is immutable' do + context 'when the registry is brand new' do + context 'when the model is immutable' do + before do + skip 'this context does not apply to mutable models' unless replicator.immutable? + end + + context 'when the resource already exists on this site' do before do - skip 'this context does not apply to mutable models' unless replicator.immutable? + allow(replicator).to receive(:resource_exists?).and_return(true) end - context 'when the resource already exists on this site' do + context 'when verification is enabled for this model' do before do - allow(replicator).to receive(:resource_exists?).and_return(true) + unless replicator.class.verification_enabled? + skip 'this context does not apply to models that are not verified' + end end - context 'when verification is enabled for this model' do + context 'when the resource is in verifiables' do before do - unless replicator.class.verification_enabled? - skip 'this context does not apply to models that are not verified' - end - end - - context 'when the resource is in verifiables' do - before do - allow(model_record).to receive(:in_verifiables?).and_return(true) - end - - it { is_expected.to be_truthy } + allow(model_record).to receive(:in_verifiables?).and_return(true) end - context 'when the resource is not in verifiables' do - before do - allow(model_record).to receive(:in_verifiables?).and_return(false) - end - - it { is_expected.to be_falsey } - end + it { is_expected.to be_truthy } end - context 'when verification is disabled for this model' do + context 'when the resource is not in verifiables' do before do - skip 'this context does not apply to models that are verified' if replicator.class.verification_enabled? + allow(model_record).to receive(:in_verifiables?).and_return(false) end it { is_expected.to be_falsey } end end - context 'when the resource does not exist on this site' do + context 'when verification is disabled for this model' do before do - allow(replicator).to receive(:resource_exists?).and_return(false) + skip 'this context does not apply to models that are verified' if replicator.class.verification_enabled? end it { is_expected.to be_falsey } end end - context 'when the model is mutable' do + context 'when the resource does not exist on this site' do before do - skip 'this context does not apply to immutable models' if replicator.immutable? + allow(replicator).to receive(:resource_exists?).and_return(false) end it { is_expected.to be_falsey } end end - context 'when the registry is not brand new (sync or verification has been attempted before)' do + context 'when the model is mutable' do before do - model_record.save! - replicator.registry.start - replicator.registry.synced! - replicator.registry.pending! + skip 'this context does not apply to immutable models' if replicator.immutable? end it { is_expected.to be_falsey } end end - context 'when geo_skip_download_if_exists is disabled' do + context 'when the registry is not brand new (sync or verification has been attempted before)' do before do - stub_feature_flags(geo_skip_download_if_exists: false) + model_record.save! + replicator.registry.start + replicator.registry.synced! + replicator.registry.pending! end it { is_expected.to be_falsey } -- GitLab