From 7c4794bca3e72c3eaed5d304fd4a4d265bbd463e Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Thu, 6 Aug 2020 23:49:23 -0700 Subject: [PATCH] Add Azure Blob Storage support This uses our custom Azure gem (https://gitlab.com/gitlab-org/gitlab-fog-azure-rm) to integrate direct upload access with GitLab. Because the Azure Put Blob API does not work with chunked encoding, uploads cannot be streamed directly via a pre-signed URL without saving to disk first. To make this work without that, we need to add an Azure client directly in Workhorse that uses the Put Block and Put Block List API. The Workhorse client is implemented in https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/555. We use the Go Cloud Development Kit to generate a URL (e.g. `azblob://container`) that can be extended for other object storage providers. Part of https://gitlab.com/gitlab-org/gitlab/-/issues/25877 --- Gemfile | 1 + Gemfile.lock | 29 +++++++++++++ .../unreleased/sh-add-azure-blob-support.yml | 5 +++ config/initializers/carrierwave_patch.rb | 31 +++++++++++++ config/initializers/direct_upload_support.rb | 20 ++++++--- lib/object_storage/config.rb | 4 ++ lib/object_storage/direct_upload.rb | 25 ++++++++++- spec/initializers/carrierwave_patch_spec.rb | 32 ++++++++++++++ .../direct_upload_support_spec.rb | 12 +++++- spec/lib/object_storage/direct_upload_spec.rb | 43 ++++++++++++++++++- 10 files changed, 193 insertions(+), 9 deletions(-) create mode 100644 changelogs/unreleased/sh-add-azure-blob-support.yml create mode 100644 spec/initializers/carrierwave_patch_spec.rb diff --git a/Gemfile b/Gemfile index d24850274d43..391011884540 100644 --- a/Gemfile +++ b/Gemfile @@ -119,6 +119,7 @@ gem 'fog-local', '~> 0.6' gem 'fog-openstack', '~> 1.0' gem 'fog-rackspace', '~> 0.1.1' gem 'fog-aliyun', '~> 0.3' +gem 'gitlab-fog-azure-rm', '~> 0.7', require: false # for Google storage gem 'google-api-client', '~> 0.33' diff --git a/Gemfile.lock b/Gemfile.lock index aa786f37ca06..1202f3f4cd99 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -112,6 +112,15 @@ GEM aws-sigv4 (~> 1.1) aws-sigv4 (1.2.1) aws-eventstream (~> 1, >= 1.0.2) + azure-core (0.1.15) + faraday (~> 0.9) + faraday_middleware (~> 0.10) + nokogiri (~> 1.6) + azure-storage (0.15.0.preview) + azure-core (~> 0.1) + faraday (~> 0.9) + faraday_middleware (~> 0.10) + nokogiri (~> 1.6, >= 1.6.8) babosa (1.0.2) base32 (0.3.2) batch-loader (1.4.0) @@ -313,6 +322,9 @@ GEM railties (>= 4.2.0) faraday (0.17.3) multipart-post (>= 1.2, < 3) + faraday-cookie_jar (0.0.6) + faraday (>= 0.7.4) + http-cookie (~> 1.0.0) faraday-http-cache (2.0.0) faraday (~> 0.8) faraday_middleware (0.14.0) @@ -407,6 +419,12 @@ GEM github-markup (1.7.0) gitlab-chronic (0.10.5) numerizer (~> 0.2) + gitlab-fog-azure-rm (0.7.0) + azure-storage (~> 0.15.0.preview) + fog-core (= 2.1.0) + fog-json (~> 1.2.0) + mime-types + ms_rest_azure (~> 0.12.0) gitlab-labkit (0.12.1) actionpack (>= 5.0.0, < 6.1.0) activesupport (>= 5.0.0, < 6.1.0) @@ -668,6 +686,15 @@ GEM mini_mime (1.0.2) mini_portile2 (2.4.0) minitest (5.11.3) + ms_rest (0.7.6) + concurrent-ruby (~> 1.0) + faraday (>= 0.9, < 2.0.0) + timeliness (~> 0.3.10) + ms_rest_azure (0.12.0) + concurrent-ruby (~> 1.0) + faraday (>= 0.9, < 2.0.0) + faraday-cookie_jar (~> 0.0.6) + ms_rest (~> 0.7.6) msgpack (1.3.1) multi_json (1.14.1) multi_xml (0.6.0) @@ -1104,6 +1131,7 @@ GEM thrift (0.11.0.0) tilt (2.0.10) timecop (0.9.1) + timeliness (0.3.10) timfel-krb5-auth (0.8.3) toml (0.2.0) parslet (~> 1.8.0) @@ -1275,6 +1303,7 @@ DEPENDENCIES gitaly (~> 13.3.0.pre.rc1) github-markup (~> 1.7.0) gitlab-chronic (~> 0.10.5) + gitlab-fog-azure-rm (~> 0.7) gitlab-labkit (= 0.12.1) gitlab-license (~> 1.0) gitlab-mail_room (~> 0.0.6) diff --git a/changelogs/unreleased/sh-add-azure-blob-support.yml b/changelogs/unreleased/sh-add-azure-blob-support.yml new file mode 100644 index 000000000000..32af5deef20b --- /dev/null +++ b/changelogs/unreleased/sh-add-azure-blob-support.yml @@ -0,0 +1,5 @@ +--- +title: Add Azure Blob Storage support +merge_request: 38882 +author: +type: added diff --git a/config/initializers/carrierwave_patch.rb b/config/initializers/carrierwave_patch.rb index 94a79e5990d9..53fba307926e 100644 --- a/config/initializers/carrierwave_patch.rb +++ b/config/initializers/carrierwave_patch.rb @@ -4,6 +4,12 @@ # This pulls in https://github.com/carrierwaveuploader/carrierwave/pull/2504 to support # sending AWS S3 encryption headers when copying objects. +# +# This patch also incorporates +# https://github.com/carrierwaveuploader/carrierwave/pull/2375 to +# provide Azure support. This is already in CarrierWave v2.1.x, but +# upgrading this gem is a significant task: +# https://gitlab.com/gitlab-org/gitlab/-/issues/216067 module CarrierWave module Storage class Fog < Abstract @@ -16,6 +22,31 @@ def copy_to(new_path) def copy_to_options acl_header.merge(@uploader.fog_attributes) end + + def authenticated_url(options = {}) + if %w[AWS Google Rackspace OpenStack AzureRM].include?(@uploader.fog_credentials[:provider]) + # avoid a get by using local references + local_directory = connection.directories.new(key: @uploader.fog_directory) + local_file = local_directory.files.new(key: path) + expire_at = ::Fog::Time.now + @uploader.fog_authenticated_url_expiration + case @uploader.fog_credentials[:provider] + when 'AWS', 'Google' + # Older versions of fog-google do not support options as a parameter + if url_options_supported?(local_file) + local_file.url(expire_at, options) + else + warn "Options hash not supported in #{local_file.class}. You may need to upgrade your Fog provider." + local_file.url(expire_at) + end + when 'Rackspace' + connection.get_object_https_url(@uploader.fog_directory, path, expire_at, options) + when 'OpenStack' + connection.get_object_https_url(@uploader.fog_directory, path, expire_at) + else + local_file.url(expire_at) + end + end + end end end end diff --git a/config/initializers/direct_upload_support.rb b/config/initializers/direct_upload_support.rb index 0fc6e82207e3..94e90727f0ce 100644 --- a/config/initializers/direct_upload_support.rb +++ b/config/initializers/direct_upload_support.rb @@ -1,5 +1,5 @@ class DirectUploadsValidator - SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS).freeze + SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS AzureRM).freeze ValidationError = Class.new(StandardError) @@ -13,22 +13,32 @@ def verify!(uploader_type, object_store) raise ValidationError, "No provider configured for '#{uploader_type}'. #{supported_provider_text}" if provider.blank? - return if SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider) + return if provider_loaded?(provider) raise ValidationError, "Object storage provider '#{provider}' is not supported " \ "when 'direct_upload' is used for '#{uploader_type}'. #{supported_provider_text}" end + private + + def provider_loaded?(provider) + return false unless SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider) + + require 'fog/azurerm' if provider == 'AzureRM' + + true + end + def supported_provider_text - "Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.join(', ')} are supported." + "Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.to_sentence} are supported." end end DirectUploadsValidator.new.tap do |validator| CONFIGS = { artifacts: Gitlab.config.artifacts, - uploads: Gitlab.config.uploads, - lfs: Gitlab.config.lfs + lfs: Gitlab.config.lfs, + uploads: Gitlab.config.uploads }.freeze CONFIGS.each do |uploader_type, uploader| diff --git a/lib/object_storage/config.rb b/lib/object_storage/config.rb index 489b85104d6c..04da163ea8dd 100644 --- a/lib/object_storage/config.rb +++ b/lib/object_storage/config.rb @@ -58,6 +58,10 @@ def google? provider == 'Google' end + def azure? + provider == 'AzureRM' + end + def fog_attributes @fog_attributes ||= begin return {} unless enabled? && aws? diff --git a/lib/object_storage/direct_upload.rb b/lib/object_storage/direct_upload.rb index 5784a089bbac..90199114f2c5 100644 --- a/lib/object_storage/direct_upload.rb +++ b/lib/object_storage/direct_upload.rb @@ -62,8 +62,16 @@ def multipart_upload_hash end def workhorse_client_hash - return {} unless config.aws? + if config.aws? + workhorse_aws_hash + elsif config.azure? + workhorse_azure_hash + else + {} + end + end + def workhorse_aws_hash { UseWorkhorseClient: use_workhorse_s3_client?, RemoteTempObjectID: object_name, @@ -82,6 +90,21 @@ def workhorse_client_hash } end + def workhorse_azure_hash + { + # Azure requires Workhorse client because direct uploads can't + # use pre-signed URLs without buffering the whole file to disk. + UseWorkhorseClient: true, + RemoteTempObjectID: object_name, + ObjectStorage: { + Provider: 'AzureRM', + GoCloudConfig: { + URL: "azblob://#{bucket_name}" + } + } + } + end + def use_workhorse_s3_client? return false unless Feature.enabled?(:use_workhorse_s3_client, default_enabled: true) return false unless config.use_iam_profile? || config.consolidated_settings? diff --git a/spec/initializers/carrierwave_patch_spec.rb b/spec/initializers/carrierwave_patch_spec.rb new file mode 100644 index 000000000000..d577eca2ac7e --- /dev/null +++ b/spec/initializers/carrierwave_patch_spec.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'CarrierWave::Storage::Fog::File' do + let(:uploader_class) { Class.new(CarrierWave::Uploader::Base) } + let(:uploader) { uploader_class.new } + let(:storage) { CarrierWave::Storage::Fog.new(uploader) } + let(:azure_options) do + { + azure_storage_account_name: 'AZURE_ACCOUNT_NAME', + azure_storage_access_key: 'AZURE_ACCESS_KEY', + provider: 'AzureRM' + } + end + + subject { CarrierWave::Storage::Fog::File.new(uploader, storage, 'test') } + + before do + require 'fog/azurerm' + allow(uploader).to receive(:fog_credentials).and_return(azure_options) + Fog.mock! + end + + describe '#authenticated_url' do + context 'with Azure' do + it 'has an authenticated URL' do + expect(subject.authenticated_url).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token") + end + end + end +end diff --git a/spec/initializers/direct_upload_support_spec.rb b/spec/initializers/direct_upload_support_spec.rb index aa77c0905c95..670deecb4f11 100644 --- a/spec/initializers/direct_upload_support_spec.rb +++ b/spec/initializers/direct_upload_support_spec.rb @@ -8,7 +8,7 @@ end where(:config_name) do - %w(lfs artifacts uploads) + %w(artifacts lfs uploads) end with_them do @@ -52,11 +52,19 @@ end end + context 'when provider is AzureRM' do + let(:provider) { 'AzureRM' } + + it 'succeeds' do + expect { subject }.not_to raise_error + end + end + context 'when connection is empty' do let(:connection) { nil } it 'raises an error' do - expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS are supported." + expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS, and AzureRM are supported." end end diff --git a/spec/lib/object_storage/direct_upload_spec.rb b/spec/lib/object_storage/direct_upload_spec.rb index 7e0f31cbd23e..b11926aeb49c 100644 --- a/spec/lib/object_storage/direct_upload_spec.rb +++ b/spec/lib/object_storage/direct_upload_spec.rb @@ -105,7 +105,7 @@ end end - describe '#to_hash' do + describe '#to_hash', :aggregate_failures do subject { direct_upload.to_hash } shared_examples 'a valid S3 upload' do @@ -200,6 +200,21 @@ end end + shared_examples 'a valid AzureRM upload' do + before do + require 'fog/azurerm' + end + + it_behaves_like 'a valid upload' + + it 'enables the Workhorse client' do + expect(subject[:UseWorkhorseClient]).to be true + expect(subject[:RemoteTempObjectID]).to eq(object_name) + expect(subject[:ObjectStorage][:Provider]).to eq('AzureRM') + expect(subject[:ObjectStorage][:GoCloudConfig]).to eq({ URL: "azblob://#{bucket_name}" }) + end + end + shared_examples 'a valid upload' do it "returns valid structure" do expect(subject).to have_key(:Timeout) @@ -370,5 +385,31 @@ it_behaves_like 'a valid upload without multipart data' end end + + context 'when AzureRM is used' do + let(:credentials) do + { + provider: 'AzureRM', + azure_storage_account_name: 'azuretest', + azure_storage_access_key: 'ABCD1234' + } + end + + let(:storage_url) { 'https://azuretest.blob.core.windows.net' } + + context 'when length is known' do + let(:has_length) { true } + + it_behaves_like 'a valid AzureRM upload' + it_behaves_like 'a valid upload without multipart data' + end + + context 'when length is unknown' do + let(:has_length) { false } + + it_behaves_like 'a valid AzureRM upload' + it_behaves_like 'a valid upload without multipart data' + end + end end end -- GitLab