diff --git a/Gemfile b/Gemfile index d24850274d43450b6aaea29da41637a1513b2b49..39101188454012ac33be9f42b8e36d84bb3f128c 100644 --- a/Gemfile +++ b/Gemfile @@ -119,6 +119,7 @@ gem 'fog-local', '~> 0.6' gem 'fog-openstack', '~> 1.0' gem 'fog-rackspace', '~> 0.1.1' gem 'fog-aliyun', '~> 0.3' +gem 'gitlab-fog-azure-rm', '~> 0.7', require: false # for Google storage gem 'google-api-client', '~> 0.33' diff --git a/Gemfile.lock b/Gemfile.lock index aa786f37ca062e502298342395b3ef4e46c59aa2..1202f3f4cd9951f061cf76647cffcf0968bf7711 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -112,6 +112,15 @@ GEM aws-sigv4 (~> 1.1) aws-sigv4 (1.2.1) aws-eventstream (~> 1, >= 1.0.2) + azure-core (0.1.15) + faraday (~> 0.9) + faraday_middleware (~> 0.10) + nokogiri (~> 1.6) + azure-storage (0.15.0.preview) + azure-core (~> 0.1) + faraday (~> 0.9) + faraday_middleware (~> 0.10) + nokogiri (~> 1.6, >= 1.6.8) babosa (1.0.2) base32 (0.3.2) batch-loader (1.4.0) @@ -313,6 +322,9 @@ GEM railties (>= 4.2.0) faraday (0.17.3) multipart-post (>= 1.2, < 3) + faraday-cookie_jar (0.0.6) + faraday (>= 0.7.4) + http-cookie (~> 1.0.0) faraday-http-cache (2.0.0) faraday (~> 0.8) faraday_middleware (0.14.0) @@ -407,6 +419,12 @@ GEM github-markup (1.7.0) gitlab-chronic (0.10.5) numerizer (~> 0.2) + gitlab-fog-azure-rm (0.7.0) + azure-storage (~> 0.15.0.preview) + fog-core (= 2.1.0) + fog-json (~> 1.2.0) + mime-types + ms_rest_azure (~> 0.12.0) gitlab-labkit (0.12.1) actionpack (>= 5.0.0, < 6.1.0) activesupport (>= 5.0.0, < 6.1.0) @@ -668,6 +686,15 @@ GEM mini_mime (1.0.2) mini_portile2 (2.4.0) minitest (5.11.3) + ms_rest (0.7.6) + concurrent-ruby (~> 1.0) + faraday (>= 0.9, < 2.0.0) + timeliness (~> 0.3.10) + ms_rest_azure (0.12.0) + concurrent-ruby (~> 1.0) + faraday (>= 0.9, < 2.0.0) + faraday-cookie_jar (~> 0.0.6) + ms_rest (~> 0.7.6) msgpack (1.3.1) multi_json (1.14.1) multi_xml (0.6.0) @@ -1104,6 +1131,7 @@ GEM thrift (0.11.0.0) tilt (2.0.10) timecop (0.9.1) + timeliness (0.3.10) timfel-krb5-auth (0.8.3) toml (0.2.0) parslet (~> 1.8.0) @@ -1275,6 +1303,7 @@ DEPENDENCIES gitaly (~> 13.3.0.pre.rc1) github-markup (~> 1.7.0) gitlab-chronic (~> 0.10.5) + gitlab-fog-azure-rm (~> 0.7) gitlab-labkit (= 0.12.1) gitlab-license (~> 1.0) gitlab-mail_room (~> 0.0.6) diff --git a/changelogs/unreleased/sh-add-azure-blob-support.yml b/changelogs/unreleased/sh-add-azure-blob-support.yml new file mode 100644 index 0000000000000000000000000000000000000000..32af5deef20bfb57d8e7da66906bc1cd18793c01 --- /dev/null +++ b/changelogs/unreleased/sh-add-azure-blob-support.yml @@ -0,0 +1,5 @@ +--- +title: Add Azure Blob Storage support +merge_request: 38882 +author: +type: added diff --git a/config/initializers/carrierwave_patch.rb b/config/initializers/carrierwave_patch.rb index 94a79e5990d9d0e69faf872a8fe036923f08a22e..53fba307926e5cefce637f6f7c4a02a34c3b39e6 100644 --- a/config/initializers/carrierwave_patch.rb +++ b/config/initializers/carrierwave_patch.rb @@ -4,6 +4,12 @@ # This pulls in https://github.com/carrierwaveuploader/carrierwave/pull/2504 to support # sending AWS S3 encryption headers when copying objects. +# +# This patch also incorporates +# https://github.com/carrierwaveuploader/carrierwave/pull/2375 to +# provide Azure support. This is already in CarrierWave v2.1.x, but +# upgrading this gem is a significant task: +# https://gitlab.com/gitlab-org/gitlab/-/issues/216067 module CarrierWave module Storage class Fog < Abstract @@ -16,6 +22,31 @@ def copy_to(new_path) def copy_to_options acl_header.merge(@uploader.fog_attributes) end + + def authenticated_url(options = {}) + if %w[AWS Google Rackspace OpenStack AzureRM].include?(@uploader.fog_credentials[:provider]) + # avoid a get by using local references + local_directory = connection.directories.new(key: @uploader.fog_directory) + local_file = local_directory.files.new(key: path) + expire_at = ::Fog::Time.now + @uploader.fog_authenticated_url_expiration + case @uploader.fog_credentials[:provider] + when 'AWS', 'Google' + # Older versions of fog-google do not support options as a parameter + if url_options_supported?(local_file) + local_file.url(expire_at, options) + else + warn "Options hash not supported in #{local_file.class}. You may need to upgrade your Fog provider." + local_file.url(expire_at) + end + when 'Rackspace' + connection.get_object_https_url(@uploader.fog_directory, path, expire_at, options) + when 'OpenStack' + connection.get_object_https_url(@uploader.fog_directory, path, expire_at) + else + local_file.url(expire_at) + end + end + end end end end diff --git a/config/initializers/direct_upload_support.rb b/config/initializers/direct_upload_support.rb index 0fc6e82207e37f9b79a3311dc2ee00dbe223ec7b..94e90727f0cefb1d4cb83303f3efb9269118c8e4 100644 --- a/config/initializers/direct_upload_support.rb +++ b/config/initializers/direct_upload_support.rb @@ -1,5 +1,5 @@ class DirectUploadsValidator - SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS).freeze + SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS AzureRM).freeze ValidationError = Class.new(StandardError) @@ -13,22 +13,32 @@ def verify!(uploader_type, object_store) raise ValidationError, "No provider configured for '#{uploader_type}'. #{supported_provider_text}" if provider.blank? - return if SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider) + return if provider_loaded?(provider) raise ValidationError, "Object storage provider '#{provider}' is not supported " \ "when 'direct_upload' is used for '#{uploader_type}'. #{supported_provider_text}" end + private + + def provider_loaded?(provider) + return false unless SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider) + + require 'fog/azurerm' if provider == 'AzureRM' + + true + end + def supported_provider_text - "Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.join(', ')} are supported." + "Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.to_sentence} are supported." end end DirectUploadsValidator.new.tap do |validator| CONFIGS = { artifacts: Gitlab.config.artifacts, - uploads: Gitlab.config.uploads, - lfs: Gitlab.config.lfs + lfs: Gitlab.config.lfs, + uploads: Gitlab.config.uploads }.freeze CONFIGS.each do |uploader_type, uploader| diff --git a/lib/object_storage/config.rb b/lib/object_storage/config.rb index 489b85104d6c6a2db5e3d868da5e7830c641e38d..04da163ea8dde416966307481305e13fcd07122e 100644 --- a/lib/object_storage/config.rb +++ b/lib/object_storage/config.rb @@ -58,6 +58,10 @@ def google? provider == 'Google' end + def azure? + provider == 'AzureRM' + end + def fog_attributes @fog_attributes ||= begin return {} unless enabled? && aws? diff --git a/lib/object_storage/direct_upload.rb b/lib/object_storage/direct_upload.rb index 5784a089bbac432db4f8691151931a8f688ecd05..90199114f2c51585b61d377687b19e98304e8b11 100644 --- a/lib/object_storage/direct_upload.rb +++ b/lib/object_storage/direct_upload.rb @@ -62,8 +62,16 @@ def multipart_upload_hash end def workhorse_client_hash - return {} unless config.aws? + if config.aws? + workhorse_aws_hash + elsif config.azure? + workhorse_azure_hash + else + {} + end + end + def workhorse_aws_hash { UseWorkhorseClient: use_workhorse_s3_client?, RemoteTempObjectID: object_name, @@ -82,6 +90,21 @@ def workhorse_client_hash } end + def workhorse_azure_hash + { + # Azure requires Workhorse client because direct uploads can't + # use pre-signed URLs without buffering the whole file to disk. + UseWorkhorseClient: true, + RemoteTempObjectID: object_name, + ObjectStorage: { + Provider: 'AzureRM', + GoCloudConfig: { + URL: "azblob://#{bucket_name}" + } + } + } + end + def use_workhorse_s3_client? return false unless Feature.enabled?(:use_workhorse_s3_client, default_enabled: true) return false unless config.use_iam_profile? || config.consolidated_settings? diff --git a/spec/initializers/carrierwave_patch_spec.rb b/spec/initializers/carrierwave_patch_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..d577eca2ac7eed1f562f0abb634ffa84283dde8e --- /dev/null +++ b/spec/initializers/carrierwave_patch_spec.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'CarrierWave::Storage::Fog::File' do + let(:uploader_class) { Class.new(CarrierWave::Uploader::Base) } + let(:uploader) { uploader_class.new } + let(:storage) { CarrierWave::Storage::Fog.new(uploader) } + let(:azure_options) do + { + azure_storage_account_name: 'AZURE_ACCOUNT_NAME', + azure_storage_access_key: 'AZURE_ACCESS_KEY', + provider: 'AzureRM' + } + end + + subject { CarrierWave::Storage::Fog::File.new(uploader, storage, 'test') } + + before do + require 'fog/azurerm' + allow(uploader).to receive(:fog_credentials).and_return(azure_options) + Fog.mock! + end + + describe '#authenticated_url' do + context 'with Azure' do + it 'has an authenticated URL' do + expect(subject.authenticated_url).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token") + end + end + end +end diff --git a/spec/initializers/direct_upload_support_spec.rb b/spec/initializers/direct_upload_support_spec.rb index aa77c0905c958526da708160ac3627660a3da097..670deecb4f11851d921e52c0b3a20f76d59f065a 100644 --- a/spec/initializers/direct_upload_support_spec.rb +++ b/spec/initializers/direct_upload_support_spec.rb @@ -8,7 +8,7 @@ end where(:config_name) do - %w(lfs artifacts uploads) + %w(artifacts lfs uploads) end with_them do @@ -52,11 +52,19 @@ end end + context 'when provider is AzureRM' do + let(:provider) { 'AzureRM' } + + it 'succeeds' do + expect { subject }.not_to raise_error + end + end + context 'when connection is empty' do let(:connection) { nil } it 'raises an error' do - expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS are supported." + expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS, and AzureRM are supported." end end diff --git a/spec/lib/object_storage/direct_upload_spec.rb b/spec/lib/object_storage/direct_upload_spec.rb index 7e0f31cbd23ebeef5117e1ab66c0d1c22ab08d1e..b11926aeb49c944cf578a25f14320f4c30711ae4 100644 --- a/spec/lib/object_storage/direct_upload_spec.rb +++ b/spec/lib/object_storage/direct_upload_spec.rb @@ -105,7 +105,7 @@ end end - describe '#to_hash' do + describe '#to_hash', :aggregate_failures do subject { direct_upload.to_hash } shared_examples 'a valid S3 upload' do @@ -200,6 +200,21 @@ end end + shared_examples 'a valid AzureRM upload' do + before do + require 'fog/azurerm' + end + + it_behaves_like 'a valid upload' + + it 'enables the Workhorse client' do + expect(subject[:UseWorkhorseClient]).to be true + expect(subject[:RemoteTempObjectID]).to eq(object_name) + expect(subject[:ObjectStorage][:Provider]).to eq('AzureRM') + expect(subject[:ObjectStorage][:GoCloudConfig]).to eq({ URL: "azblob://#{bucket_name}" }) + end + end + shared_examples 'a valid upload' do it "returns valid structure" do expect(subject).to have_key(:Timeout) @@ -370,5 +385,31 @@ it_behaves_like 'a valid upload without multipart data' end end + + context 'when AzureRM is used' do + let(:credentials) do + { + provider: 'AzureRM', + azure_storage_account_name: 'azuretest', + azure_storage_access_key: 'ABCD1234' + } + end + + let(:storage_url) { 'https://azuretest.blob.core.windows.net' } + + context 'when length is known' do + let(:has_length) { true } + + it_behaves_like 'a valid AzureRM upload' + it_behaves_like 'a valid upload without multipart data' + end + + context 'when length is unknown' do + let(:has_length) { false } + + it_behaves_like 'a valid AzureRM upload' + it_behaves_like 'a valid upload without multipart data' + end + end end end