From 7c4794bca3e72c3eaed5d304fd4a4d265bbd463e Mon Sep 17 00:00:00 2001
From: Stan Hu <stanhu@gmail.com>
Date: Thu, 6 Aug 2020 23:49:23 -0700
Subject: [PATCH] Add Azure Blob Storage support

This uses our custom Azure gem
(https://gitlab.com/gitlab-org/gitlab-fog-azure-rm) to integrate direct
upload access with GitLab.

Because the Azure Put Blob API does not work with chunked encoding,
uploads cannot be streamed directly via a pre-signed URL without saving
to disk first. To make this work without that, we need to add an Azure
client directly in Workhorse that uses the Put Block and Put Block List
API.

The Workhorse client is implemented in
https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/555. We
use the Go Cloud Development Kit to generate a URL
(e.g. `azblob://container`) that can be extended for other object
storage providers.

Part of https://gitlab.com/gitlab-org/gitlab/-/issues/25877
---
 Gemfile                                       |  1 +
 Gemfile.lock                                  | 29 +++++++++++++
 .../unreleased/sh-add-azure-blob-support.yml  |  5 +++
 config/initializers/carrierwave_patch.rb      | 31 +++++++++++++
 config/initializers/direct_upload_support.rb  | 20 ++++++---
 lib/object_storage/config.rb                  |  4 ++
 lib/object_storage/direct_upload.rb           | 25 ++++++++++-
 spec/initializers/carrierwave_patch_spec.rb   | 32 ++++++++++++++
 .../direct_upload_support_spec.rb             | 12 +++++-
 spec/lib/object_storage/direct_upload_spec.rb | 43 ++++++++++++++++++-
 10 files changed, 193 insertions(+), 9 deletions(-)
 create mode 100644 changelogs/unreleased/sh-add-azure-blob-support.yml
 create mode 100644 spec/initializers/carrierwave_patch_spec.rb

diff --git a/Gemfile b/Gemfile
index d24850274d43..391011884540 100644
--- a/Gemfile
+++ b/Gemfile
@@ -119,6 +119,7 @@ gem 'fog-local', '~> 0.6'
 gem 'fog-openstack', '~> 1.0'
 gem 'fog-rackspace', '~> 0.1.1'
 gem 'fog-aliyun', '~> 0.3'
+gem 'gitlab-fog-azure-rm', '~> 0.7', require: false
 
 # for Google storage
 gem 'google-api-client', '~> 0.33'
diff --git a/Gemfile.lock b/Gemfile.lock
index aa786f37ca06..1202f3f4cd99 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -112,6 +112,15 @@ GEM
       aws-sigv4 (~> 1.1)
     aws-sigv4 (1.2.1)
       aws-eventstream (~> 1, >= 1.0.2)
+    azure-core (0.1.15)
+      faraday (~> 0.9)
+      faraday_middleware (~> 0.10)
+      nokogiri (~> 1.6)
+    azure-storage (0.15.0.preview)
+      azure-core (~> 0.1)
+      faraday (~> 0.9)
+      faraday_middleware (~> 0.10)
+      nokogiri (~> 1.6, >= 1.6.8)
     babosa (1.0.2)
     base32 (0.3.2)
     batch-loader (1.4.0)
@@ -313,6 +322,9 @@ GEM
       railties (>= 4.2.0)
     faraday (0.17.3)
       multipart-post (>= 1.2, < 3)
+    faraday-cookie_jar (0.0.6)
+      faraday (>= 0.7.4)
+      http-cookie (~> 1.0.0)
     faraday-http-cache (2.0.0)
       faraday (~> 0.8)
     faraday_middleware (0.14.0)
@@ -407,6 +419,12 @@ GEM
     github-markup (1.7.0)
     gitlab-chronic (0.10.5)
       numerizer (~> 0.2)
+    gitlab-fog-azure-rm (0.7.0)
+      azure-storage (~> 0.15.0.preview)
+      fog-core (= 2.1.0)
+      fog-json (~> 1.2.0)
+      mime-types
+      ms_rest_azure (~> 0.12.0)
     gitlab-labkit (0.12.1)
       actionpack (>= 5.0.0, < 6.1.0)
       activesupport (>= 5.0.0, < 6.1.0)
@@ -668,6 +686,15 @@ GEM
     mini_mime (1.0.2)
     mini_portile2 (2.4.0)
     minitest (5.11.3)
+    ms_rest (0.7.6)
+      concurrent-ruby (~> 1.0)
+      faraday (>= 0.9, < 2.0.0)
+      timeliness (~> 0.3.10)
+    ms_rest_azure (0.12.0)
+      concurrent-ruby (~> 1.0)
+      faraday (>= 0.9, < 2.0.0)
+      faraday-cookie_jar (~> 0.0.6)
+      ms_rest (~> 0.7.6)
     msgpack (1.3.1)
     multi_json (1.14.1)
     multi_xml (0.6.0)
@@ -1104,6 +1131,7 @@ GEM
     thrift (0.11.0.0)
     tilt (2.0.10)
     timecop (0.9.1)
+    timeliness (0.3.10)
     timfel-krb5-auth (0.8.3)
     toml (0.2.0)
       parslet (~> 1.8.0)
@@ -1275,6 +1303,7 @@ DEPENDENCIES
   gitaly (~> 13.3.0.pre.rc1)
   github-markup (~> 1.7.0)
   gitlab-chronic (~> 0.10.5)
+  gitlab-fog-azure-rm (~> 0.7)
   gitlab-labkit (= 0.12.1)
   gitlab-license (~> 1.0)
   gitlab-mail_room (~> 0.0.6)
diff --git a/changelogs/unreleased/sh-add-azure-blob-support.yml b/changelogs/unreleased/sh-add-azure-blob-support.yml
new file mode 100644
index 000000000000..32af5deef20b
--- /dev/null
+++ b/changelogs/unreleased/sh-add-azure-blob-support.yml
@@ -0,0 +1,5 @@
+---
+title: Add Azure Blob Storage support
+merge_request: 38882
+author:
+type: added
diff --git a/config/initializers/carrierwave_patch.rb b/config/initializers/carrierwave_patch.rb
index 94a79e5990d9..53fba307926e 100644
--- a/config/initializers/carrierwave_patch.rb
+++ b/config/initializers/carrierwave_patch.rb
@@ -4,6 +4,12 @@
 
 # This pulls in https://github.com/carrierwaveuploader/carrierwave/pull/2504 to support
 # sending AWS S3 encryption headers when copying objects.
+#
+# This patch also incorporates
+# https://github.com/carrierwaveuploader/carrierwave/pull/2375 to
+# provide Azure support. This is already in CarrierWave v2.1.x, but
+# upgrading this gem is a significant task:
+# https://gitlab.com/gitlab-org/gitlab/-/issues/216067
 module CarrierWave
   module Storage
     class Fog < Abstract
@@ -16,6 +22,31 @@ def copy_to(new_path)
         def copy_to_options
           acl_header.merge(@uploader.fog_attributes)
         end
+
+        def authenticated_url(options = {})
+          if %w[AWS Google Rackspace OpenStack AzureRM].include?(@uploader.fog_credentials[:provider])
+            # avoid a get by using local references
+            local_directory = connection.directories.new(key: @uploader.fog_directory)
+            local_file = local_directory.files.new(key: path)
+            expire_at = ::Fog::Time.now + @uploader.fog_authenticated_url_expiration
+            case @uploader.fog_credentials[:provider]
+            when 'AWS', 'Google'
+              # Older versions of fog-google do not support options as a parameter
+              if url_options_supported?(local_file)
+                local_file.url(expire_at, options)
+              else
+                warn "Options hash not supported in #{local_file.class}. You may need to upgrade your Fog provider."
+                local_file.url(expire_at)
+              end
+            when 'Rackspace'
+              connection.get_object_https_url(@uploader.fog_directory, path, expire_at, options)
+            when 'OpenStack'
+              connection.get_object_https_url(@uploader.fog_directory, path, expire_at)
+            else
+              local_file.url(expire_at)
+            end
+          end
+        end
       end
     end
   end
diff --git a/config/initializers/direct_upload_support.rb b/config/initializers/direct_upload_support.rb
index 0fc6e82207e3..94e90727f0ce 100644
--- a/config/initializers/direct_upload_support.rb
+++ b/config/initializers/direct_upload_support.rb
@@ -1,5 +1,5 @@
 class DirectUploadsValidator
-  SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS).freeze
+  SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS AzureRM).freeze
 
   ValidationError = Class.new(StandardError)
 
@@ -13,22 +13,32 @@ def verify!(uploader_type, object_store)
 
     raise ValidationError, "No provider configured for '#{uploader_type}'. #{supported_provider_text}" if provider.blank?
 
-    return if SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider)
+    return if provider_loaded?(provider)
 
     raise ValidationError, "Object storage provider '#{provider}' is not supported " \
                            "when 'direct_upload' is used for '#{uploader_type}'. #{supported_provider_text}"
   end
 
+  private
+
+  def provider_loaded?(provider)
+    return false unless SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider)
+
+    require 'fog/azurerm' if provider == 'AzureRM'
+
+    true
+  end
+
   def supported_provider_text
-    "Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.join(', ')} are supported."
+    "Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.to_sentence} are supported."
   end
 end
 
 DirectUploadsValidator.new.tap do |validator|
   CONFIGS = {
     artifacts: Gitlab.config.artifacts,
-    uploads: Gitlab.config.uploads,
-    lfs: Gitlab.config.lfs
+    lfs: Gitlab.config.lfs,
+    uploads: Gitlab.config.uploads
   }.freeze
 
   CONFIGS.each do |uploader_type, uploader|
diff --git a/lib/object_storage/config.rb b/lib/object_storage/config.rb
index 489b85104d6c..04da163ea8dd 100644
--- a/lib/object_storage/config.rb
+++ b/lib/object_storage/config.rb
@@ -58,6 +58,10 @@ def google?
       provider == 'Google'
     end
 
+    def azure?
+      provider == 'AzureRM'
+    end
+
     def fog_attributes
       @fog_attributes ||= begin
         return {} unless enabled? && aws?
diff --git a/lib/object_storage/direct_upload.rb b/lib/object_storage/direct_upload.rb
index 5784a089bbac..90199114f2c5 100644
--- a/lib/object_storage/direct_upload.rb
+++ b/lib/object_storage/direct_upload.rb
@@ -62,8 +62,16 @@ def multipart_upload_hash
     end
 
     def workhorse_client_hash
-      return {} unless config.aws?
+      if config.aws?
+        workhorse_aws_hash
+      elsif config.azure?
+        workhorse_azure_hash
+      else
+        {}
+      end
+    end
 
+    def workhorse_aws_hash
       {
         UseWorkhorseClient: use_workhorse_s3_client?,
         RemoteTempObjectID: object_name,
@@ -82,6 +90,21 @@ def workhorse_client_hash
       }
     end
 
+    def workhorse_azure_hash
+      {
+        # Azure requires Workhorse client because direct uploads can't
+        # use pre-signed URLs without buffering the whole file to disk.
+        UseWorkhorseClient: true,
+        RemoteTempObjectID: object_name,
+        ObjectStorage: {
+          Provider: 'AzureRM',
+          GoCloudConfig: {
+            URL: "azblob://#{bucket_name}"
+          }
+        }
+      }
+    end
+
     def use_workhorse_s3_client?
       return false unless Feature.enabled?(:use_workhorse_s3_client, default_enabled: true)
       return false unless config.use_iam_profile? || config.consolidated_settings?
diff --git a/spec/initializers/carrierwave_patch_spec.rb b/spec/initializers/carrierwave_patch_spec.rb
new file mode 100644
index 000000000000..d577eca2ac7e
--- /dev/null
+++ b/spec/initializers/carrierwave_patch_spec.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe 'CarrierWave::Storage::Fog::File' do
+  let(:uploader_class) { Class.new(CarrierWave::Uploader::Base) }
+  let(:uploader) { uploader_class.new }
+  let(:storage) { CarrierWave::Storage::Fog.new(uploader) }
+  let(:azure_options) do
+    {
+      azure_storage_account_name: 'AZURE_ACCOUNT_NAME',
+      azure_storage_access_key: 'AZURE_ACCESS_KEY',
+      provider: 'AzureRM'
+    }
+  end
+
+  subject { CarrierWave::Storage::Fog::File.new(uploader, storage, 'test') }
+
+  before do
+    require 'fog/azurerm'
+    allow(uploader).to receive(:fog_credentials).and_return(azure_options)
+    Fog.mock!
+  end
+
+  describe '#authenticated_url' do
+    context 'with Azure' do
+      it 'has an authenticated URL' do
+        expect(subject.authenticated_url).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token")
+      end
+    end
+  end
+end
diff --git a/spec/initializers/direct_upload_support_spec.rb b/spec/initializers/direct_upload_support_spec.rb
index aa77c0905c95..670deecb4f11 100644
--- a/spec/initializers/direct_upload_support_spec.rb
+++ b/spec/initializers/direct_upload_support_spec.rb
@@ -8,7 +8,7 @@
   end
 
   where(:config_name) do
-    %w(lfs artifacts uploads)
+    %w(artifacts lfs uploads)
   end
 
   with_them do
@@ -52,11 +52,19 @@
           end
         end
 
+        context 'when provider is AzureRM' do
+          let(:provider) { 'AzureRM' }
+
+          it 'succeeds' do
+            expect { subject }.not_to raise_error
+          end
+        end
+
         context 'when connection is empty' do
           let(:connection) { nil }
 
           it 'raises an error' do
-            expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS are supported."
+            expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS, and AzureRM are supported."
           end
         end
 
diff --git a/spec/lib/object_storage/direct_upload_spec.rb b/spec/lib/object_storage/direct_upload_spec.rb
index 7e0f31cbd23e..b11926aeb49c 100644
--- a/spec/lib/object_storage/direct_upload_spec.rb
+++ b/spec/lib/object_storage/direct_upload_spec.rb
@@ -105,7 +105,7 @@
     end
   end
 
-  describe '#to_hash' do
+  describe '#to_hash', :aggregate_failures do
     subject { direct_upload.to_hash }
 
     shared_examples 'a valid S3 upload' do
@@ -200,6 +200,21 @@
       end
     end
 
+    shared_examples 'a valid AzureRM upload' do
+      before do
+        require 'fog/azurerm'
+      end
+
+      it_behaves_like 'a valid upload'
+
+      it 'enables the Workhorse client' do
+        expect(subject[:UseWorkhorseClient]).to be true
+        expect(subject[:RemoteTempObjectID]).to eq(object_name)
+        expect(subject[:ObjectStorage][:Provider]).to eq('AzureRM')
+        expect(subject[:ObjectStorage][:GoCloudConfig]).to eq({ URL: "azblob://#{bucket_name}" })
+      end
+    end
+
     shared_examples 'a valid upload' do
       it "returns valid structure" do
         expect(subject).to have_key(:Timeout)
@@ -370,5 +385,31 @@
         it_behaves_like 'a valid upload without multipart data'
       end
     end
+
+    context 'when AzureRM is used' do
+      let(:credentials) do
+        {
+          provider: 'AzureRM',
+          azure_storage_account_name: 'azuretest',
+          azure_storage_access_key: 'ABCD1234'
+        }
+      end
+
+      let(:storage_url) { 'https://azuretest.blob.core.windows.net' }
+
+      context 'when length is known' do
+        let(:has_length) { true }
+
+        it_behaves_like 'a valid AzureRM upload'
+        it_behaves_like 'a valid upload without multipart data'
+      end
+
+      context 'when length is unknown' do
+        let(:has_length) { false }
+
+        it_behaves_like 'a valid AzureRM upload'
+        it_behaves_like 'a valid upload without multipart data'
+      end
+    end
   end
 end
-- 
GitLab