diff --git a/doc/administration/geo/secondary_proxy/index.md b/doc/administration/geo/secondary_proxy/index.md index 2b8c0d1e6fab8b2d8a8ae098936695f6c5be3de4..b0d0861066a968bc95e56ee375360f969a6da46f 100644 --- a/doc/administration/geo/secondary_proxy/index.md +++ b/doc/administration/geo/secondary_proxy/index.md @@ -7,11 +7,14 @@ type: howto # Geo proxying for secondary sites **(PREMIUM SELF)** -> [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/5914) in GitLab 14.4 [with a flag](../../feature_flags.md) named `geo_secondary_proxy`. Disabled by default. +> - [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/5914) in GitLab 14.4 [with a flag](../../feature_flags.md) named `geo_secondary_proxy`. Disabled by default. +> - [Enabled by default for unified URLs](https://gitlab.com/gitlab-org/gitlab/-/issues/325732) in GitLab 14.5. +> - [Disabled by default for different URLs](https://gitlab.com/gitlab-org/gitlab/-/issues/325732) in GitLab 14.5 [with a flag](../../feature_flags.md) named `geo_secondary_proxy_separate_urls`. FLAG: -On self-managed GitLab, by default this feature is not available. See below to [Set up a unified URL for Geo sites](#set-up-a-unified-url-for-geo-sites). -The feature is not ready for production use. +On self-managed GitLab, this feature is only available by default for Geo sites using a unified URL. See below to +[set up a unified URL for Geo sites](#set-up-a-unified-url-for-geo-sites). +The feature is not ready for production use with separate URLs. Use Geo proxying to: @@ -65,8 +68,10 @@ a single URL used by all Geo sites, including the primary. In the Geo administration page of the **primary** site, edit each Geo secondary that is using the secondary proxying and set the `URL` field to the single URL. Make sure the primary site is also using this URL. + +## Disable Geo proxying -### Enable secondary proxying +You can disable the secondary proxying on each Geo site, separately, by following these steps: 1. SSH into each application node (serving user traffic directly) on your secondary Geo site and add the following environment variable: @@ -77,7 +82,7 @@ a single URL used by all Geo sites, including the primary. ```ruby gitlab_workhorse['env'] = { - "GEO_SECONDARY_PROXY" => "1" + "GEO_SECONDARY_PROXY" => "0" } ``` @@ -87,18 +92,34 @@ a single URL used by all Geo sites, including the primary. gitlab-ctl reconfigure ``` -1. SSH into one node running Rails on your primary Geo site and enable the Geo secondary proxy feature flag: - - ```shell - sudo gitlab-rails runner "Feature.enable(:geo_secondary_proxy)" - ``` - ## Enable Geo proxying with Separate URLs The ability to use proxying with separate URLs is still in development. You can follow the ["Geo secondary proxying with separate URLs" epic](https://gitlab.com/groups/gitlab-org/-/epics/6865) for progress. +To try out this feature, enable the `geo_secondary_proxy_separate_urls` feature flag. +SSH into one node running Rails on your primary Geo site and run: + +```shell +sudo gitlab-rails runner "Feature.enable(:geo_secondary_proxy_separate_urls)" +``` + +## Limitations + +The asynchronous Geo replication can cause unexpected issues when secondary proxying is used, for accelerated +data types that may be replicated to the Geo secondaries with a delay. + +For example, we found a potential issue where +[Replication lag introduces read-your-own-write inconsistencies](https://gitlab.com/gitlab-org/gitlab/-/issues/345267). +If the replication lag is high enough, this can result in Git reads receiving stale data when hitting a secondary. + +Non-Rails requests are not proxied, so other services may need to use a separate, non-unified URL to ensure requests +are always sent to the primary. These services include: + +- GitLab Container Registry - [can be configured to use a separate domain](../../packages/container_registry.md#configure-container-registry-under-its-own-domain). +- GitLab Pages - should always use a separate domain, as part of [the prerequisites for running GitLab Pages](../../pages/index.md#prerequisites). + ## Features accelerated by secondary Geo sites Most HTTP traffic sent to a secondary Geo site can be proxied to the primary Geo site. With this architecture, diff --git a/doc/administration/geo/setup/index.md b/doc/administration/geo/setup/index.md index 84dff69ebe71316bfd13cca2d9a7fa77e6430bec..7d365f7310129f32b49efb263a8247b0a93d8840 100644 --- a/doc/administration/geo/setup/index.md +++ b/doc/administration/geo/setup/index.md @@ -26,6 +26,7 @@ If you installed GitLab using the Omnibus packages (highly recommended): 1. [Configure GitLab](../replication/configuration.md) to set the **primary** and **secondary** site(s). 1. Optional: [Configure a secondary LDAP server](../../auth/ldap/index.md) for the **secondary** site(s). See [notes on LDAP](../index.md#ldap). 1. Follow the [Using a Geo Site](../replication/usage.md) guide. +1. [Configure Geo secondary proxying](../secondary_proxy/index.md) to use a single, unified URL for all Geo sites. This step is recommended to accelerate most read requests while transparently proxying writes to the primary Geo site. ## Post-installation documentation diff --git a/ee/config/feature_flags/development/geo_secondary_proxy.yml b/ee/config/feature_flags/development/geo_secondary_proxy_separate_urls.yml similarity index 73% rename from ee/config/feature_flags/development/geo_secondary_proxy.yml rename to ee/config/feature_flags/development/geo_secondary_proxy_separate_urls.yml index 23e648d265f9780094df6b5d7f81122c39f870c1..36eef1cd25101370c3b8f8600f75b77336e2e7e0 100644 --- a/ee/config/feature_flags/development/geo_secondary_proxy.yml +++ b/ee/config/feature_flags/development/geo_secondary_proxy_separate_urls.yml @@ -1,8 +1,8 @@ --- -name: geo_secondary_proxy -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/56297 +name: geo_secondary_proxy_separate_urls +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/74329 rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/325732 -milestone: '13.11' +milestone: '14.5' type: development group: group::geo default_enabled: false diff --git a/ee/lib/ee/api/geo.rb b/ee/lib/ee/api/geo.rb index e62ecd1af1cd22bdd668029a613689308c8c042d..eaab38172802616635bc9d4c775d12c3d9dbdf6d 100644 --- a/ee/lib/ee/api/geo.rb +++ b/ee/lib/ee/api/geo.rb @@ -38,7 +38,9 @@ def geo_proxy_response # # The cached values are invalidated when changed. # - if ::Feature.enabled?(:geo_secondary_proxy, default_enabled: :yaml) && ::Gitlab::Geo.secondary_with_primary? + return super unless ::Gitlab::Geo.secondary_with_primary? + + if ::Gitlab::Geo.secondary_with_unified_url? || ::Feature.enabled?(:geo_secondary_proxy_separate_urls, default_enabled: :yaml) { geo_proxy_url: ::Gitlab::Geo.primary_node.internal_url } else super diff --git a/ee/lib/gitlab/geo.rb b/ee/lib/gitlab/geo.rb index e72abab8cb9c3d269884663a54a40ad8b5064b4f..63420c0c4a8187c0383c5b9c463fc5bb8d419db3 100644 --- a/ee/lib/gitlab/geo.rb +++ b/ee/lib/gitlab/geo.rb @@ -86,6 +86,10 @@ def self.secondary_with_primary? self.secondary? && self.primary_node_configured? end + def self.secondary_with_unified_url? + self.secondary_with_primary? && self.primary_node.url == self.current_node.url + end + def self.license_allows? ::License.feature_available?(:geo) end diff --git a/ee/spec/lib/gitlab/geo_spec.rb b/ee/spec/lib/gitlab/geo_spec.rb index 73bf5a56b546e0c21e8f1c8cee37b17ed7a3e3e5..80748b0a6c89be4edd278aefd3bf9c4ce282f5c3 100644 --- a/ee/spec/lib/gitlab/geo_spec.rb +++ b/ee/spec/lib/gitlab/geo_spec.rb @@ -120,6 +120,76 @@ expect(described_class.secondary?).to be_falsey end end + + context 'when current node is a primary node' do + it 'returns false' do + expect(described_class.secondary?).to be_falsey + end + end + end + + describe '.secondary_with_primary?' do + context 'when current node is a primary node' do + it 'returns false' do + expect(described_class.secondary_with_primary?).to be_falsey + end + end + + context 'when current node is a secondary node' do + before do + stub_current_geo_node(secondary_node) + end + + it 'returns true' do + expect(described_class.secondary_with_primary?).to be_truthy + end + + context 'when a primary does not exist' do + it 'returns false' do + allow(::Gitlab::Geo).to receive(:primary_node_configured?).and_return(false) + + expect(described_class.secondary_with_primary?).to be_falsey + end + end + end + end + + describe '.secondary_with_unified_url?' do + context 'when current node is a primary node' do + it 'returns false' do + expect(described_class.secondary_with_unified_url?).to be_falsey + end + end + + context 'when current node is a secondary node' do + before do + stub_current_geo_node(secondary_node) + end + + context 'when a primary does not exist' do + it 'returns false' do + allow(::Gitlab::Geo).to receive(:primary_node_configured?).and_return(false) + + expect(described_class.secondary_with_unified_url?).to be_falsey + end + end + + context 'when the secondary node has different URLs' do + it 'returns false' do + expect(described_class.secondary_with_unified_url?).to be_falsey + end + end + + context 'when the secondary node has unified URL' do + before do + stub_current_geo_node(create(:geo_node, url: primary_node.url)) + end + + it 'returns true' do + expect(described_class.secondary_with_unified_url?).to be_truthy + end + end + end end describe '.enabled?' do @@ -179,21 +249,6 @@ end end - describe '.secondary?' do - context 'when current node is secondary' do - it 'returns true' do - stub_current_geo_node(secondary_node) - expect(described_class.secondary?).to be_truthy - end - end - - context 'current node is primary' do - it 'returns false' do - expect(described_class.secondary?).to be_falsey - end - end - end - describe '.expire_cache!' do it 'clears the Geo cache keys', :request_store do described_class::CACHE_KEYS.each do |key| diff --git a/ee/spec/requests/api/geo_spec.rb b/ee/spec/requests/api/geo_spec.rb index 79121cd2d91ba712bf69967d9ced539820779768..46ee94ee908cdc45586de85b716108dafd4a7e30 100644 --- a/ee/spec/requests/api/geo_spec.rb +++ b/ee/spec/requests/api/geo_spec.rb @@ -671,9 +671,11 @@ end end - context 'when this is a secondary site' do + context 'when this is a secondary site with unified URL' do + let_it_be(:unified_url_secondary_node) { create(:geo_node, url: primary_node.url) } + before do - stub_current_geo_node(secondary_node) + stub_current_geo_node(unified_url_secondary_node) end context 'when a primary exists' do @@ -697,16 +699,42 @@ end end - context 'when geo_secondary_proxy feature flag is disabled' do + context 'when this is a secondary site with separate URLs' do before do - stub_feature_flags(geo_secondary_proxy: false) + stub_current_geo_node(secondary_node) end - it 'returns empty data' do - subject + context 'when a primary does not exist' do + it 'returns empty data' do + allow(::Gitlab::Geo).to receive(:primary_node_configured?).and_return(false) - expect(response).to have_gitlab_http_status(:ok) - expect(json_response).to be_empty + subject + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response).to be_empty + end + end + + context 'when geo_secondary_proxy_separate_urls feature flag is disabled' do + before do + stub_feature_flags(geo_secondary_proxy_separate_urls: false) + end + + it 'returns empty data' do + subject + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response).to be_empty + end + end + + context 'when geo_secondary_proxy_separate_urls feature flag is enabled' do + it 'returns the primary internal URL' do + subject + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response['geo_proxy_url']).to match(primary_node.internal_url) + end end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 25759ca50b80e317682c92f2d12fefddb6d33472..0128a6334b3d51bbf0273bbde6cdf45547afefbc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -475,3 +475,7 @@ # Initialize FactoryDefault to use create_default helper TestProf::FactoryDefault.init + +# Exclude the Geo proxy API request from getting on_next_request Warden handlers, +# necessary to prevent race conditions with feature tests not getting authenticated. +::Warden.asset_paths << %r{^/api/v4/geo/proxy$} diff --git a/workhorse/internal/upstream/upstream.go b/workhorse/internal/upstream/upstream.go index e57f58d59ddb06e8968fff946550ec0080903c2f..6835569dfa89c4534d79d482842ffbb5015d383e 100644 --- a/workhorse/internal/upstream/upstream.go +++ b/workhorse/internal/upstream/upstream.go @@ -65,7 +65,7 @@ func newUpstream(cfg config.Config, accessLogger *logrus.Logger, routesCallback Config: cfg, accessLogger: accessLogger, // Kind of a feature flag. See https://gitlab.com/groups/gitlab-org/-/epics/5914#note_564974130 - enableGeoProxyFeature: os.Getenv("GEO_SECONDARY_PROXY") == "1", + enableGeoProxyFeature: os.Getenv("GEO_SECONDARY_PROXY") != "0", geoProxyBackend: &url.URL{}, } if up.geoProxyPollSleep == nil { @@ -207,8 +207,8 @@ func (u *upstream) findGeoProxyRoute(cleanedPath string, r *http.Request) *route func (u *upstream) pollGeoProxyAPI() { for { - u.callGeoProxyAPI() u.geoProxyPollSleep(geoProxyApiPollingInterval) + u.callGeoProxyAPI() } } diff --git a/workhorse/internal/upstream/upstream_test.go b/workhorse/internal/upstream/upstream_test.go index 53c15bb7e91f264afd051a712ed87a60806b1a98..4d8efed2b73650a14cd3dfff083e9ad896c72dd4 100644 --- a/workhorse/internal/upstream/upstream_test.go +++ b/workhorse/internal/upstream/upstream_test.go @@ -310,5 +310,9 @@ func startWorkhorseServer(railsServerURL string, enableGeoProxyFeature bool) (*h } } + // Since the first sleep happens before any API call, this ensures + // we call the API at least once. + waitForNextApiPoll() + return ws, ws.Close, waitForNextApiPoll }