From cb46ee6624878f9a34fdf87b183c1bd94ceafeaf Mon Sep 17 00:00:00 2001
From: ghinfey <ghinfey@gitlab.com>
Date: Wed, 15 May 2024 10:27:06 +0100
Subject: [PATCH] Add base64_encoded_blob field

Add a graphql field to return blobs encoded base64.
---
 app/graphql/types/repository/blob_type.rb     |  5 ++--
 app/presenters/blob_presenter.rb              |  4 +--
 doc/api/graphql/reference/index.md            |  2 +-
 lib/gitlab/encoding_helper.rb                 | 10 -------
 .../types/repository/blob_type_spec.rb        |  2 +-
 spec/lib/gitlab/encoding_helper_spec.rb       | 27 -------------------
 spec/presenters/blob_presenter_spec.rb        | 11 +++++---
 7 files changed, 14 insertions(+), 47 deletions(-)

diff --git a/app/graphql/types/repository/blob_type.rb b/app/graphql/types/repository/blob_type.rb
index ee41f4b24a43e..a5c9d6940ceb6 100644
--- a/app/graphql/types/repository/blob_type.rb
+++ b/app/graphql/types/repository/blob_type.rb
@@ -53,9 +53,8 @@ class BlobType < BaseObject
       field :raw_blob, GraphQL::Types::String, null: true, method: :data,
         description: 'Raw content of the blob.'
 
-      field :unicode_escaped_blob, GraphQL::Types::String, null: true, method: :unicode_escaped_blob,
-        alpha: { milestone: '16.11' }, description: 'Raw content of the blob where invalid UTF-8 characters are
-                  escaped to unicode. Returns `null` if the `unicode_escaped_data` feature flag is disabled.'
+      field :base64_encoded_blob, GraphQL::Types::String, null: true, method: :base64_encoded_blob,
+        alpha: { milestone: '17.1' }, description: 'Content of blob is encoded base64. Returns `null` if the `unicode_escaped_data` feature flag is disabled.'
 
       field :raw_text_blob, GraphQL::Types::String, null: true, method: :text_only_data,
         description: 'Raw content of the blob, if the blob is text data.'
diff --git a/app/presenters/blob_presenter.rb b/app/presenters/blob_presenter.rb
index 66d47d675025d..b42e769d349f3 100644
--- a/app/presenters/blob_presenter.rb
+++ b/app/presenters/blob_presenter.rb
@@ -95,10 +95,10 @@ def blame_path
     url_helpers.project_blame_path(*path_params)
   end
 
-  def unicode_escaped_blob
+  def base64_encoded_blob
     return unless Feature.enabled?(:unicode_escaped_blob)
 
-    encode_uft8_with_unicode_escaping(blob.raw)
+    Base64.encode64(blob.raw)
   end
 
   def history_path
diff --git a/doc/api/graphql/reference/index.md b/doc/api/graphql/reference/index.md
index b8b8a0114e677..112b706da9a0e 100644
--- a/doc/api/graphql/reference/index.md
+++ b/doc/api/graphql/reference/index.md
@@ -29304,6 +29304,7 @@ Returns [`RepositoryCodeownerValidation`](#repositorycodeownervalidation).
 | Name | Type | Description |
 | ---- | ---- | ----------- |
 | <a id="repositoryblobarchived"></a>`archived` | [`Boolean`](#boolean) | Whether the current project is archived. |
+| <a id="repositoryblobbase64encodedblob"></a>`base64EncodedBlob` **{warning-solid}** | [`String`](#string) | **Introduced** in GitLab 17.1. **Status**: Experiment. Content of blob is encoded base64. Returns `null` if the `unicode_escaped_data` feature flag is disabled. |
 | <a id="repositoryblobblamepath"></a>`blamePath` | [`String`](#string) | Web path to blob blame page. |
 | <a id="repositoryblobcancurrentuserpushtobranch"></a>`canCurrentUserPushToBranch` | [`Boolean`](#boolean) | Whether the current user can push to the branch. |
 | <a id="repositoryblobcanmodifyblob"></a>`canModifyBlob` | [`Boolean`](#boolean) | Whether the current user can modify the blob. |
@@ -29342,7 +29343,6 @@ Returns [`RepositoryCodeownerValidation`](#repositorycodeownervalidation).
 | <a id="repositoryblobsimpleviewer"></a>`simpleViewer` | [`BlobViewer!`](#blobviewer) | Blob content simple viewer. |
 | <a id="repositoryblobsize"></a>`size` | [`BigInt`](#bigint) | Size (in bytes) of the blob. |
 | <a id="repositoryblobstoredexternally"></a>`storedExternally` | [`Boolean`](#boolean) | Whether the blob's content is stored externally (for instance, in LFS). |
-| <a id="repositoryblobunicodeescapedblob"></a>`unicodeEscapedBlob` **{warning-solid}** | [`String`](#string) | **Introduced** in GitLab 16.11. **Status**: Experiment. Raw content of the blob where invalid UTF-8 characters are escaped to unicode. Returns `null` if the `unicode_escaped_data` feature flag is disabled. |
 | <a id="repositoryblobwebpath"></a>`webPath` | [`String`](#string) | Web path of the blob. |
 
 #### Fields with arguments
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index bec19fc56af31..e6cbf3b1dff0e 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -85,16 +85,6 @@ def encode_utf8_with_escaping!(message)
       message.replace clean(message)
     end
 
-    # Escapes unsupported UTF-8 characters with their equivalent unicode.
-    def encode_uft8_with_unicode_escaping(message)
-      encoded_message = force_encode_utf8(message.dup)
-      return encoded_message if encoded_message.valid_encoding?
-
-      encoded_message = encoded_message.chars.map { |char| char.valid_encoding? ? char : escape_unicode(char) }.join
-
-      clean(encoded_message)
-    end
-
     def encode_utf8(message, replace: "")
       message = force_encode_utf8(message)
       return message if message.valid_encoding?
diff --git a/spec/graphql/types/repository/blob_type_spec.rb b/spec/graphql/types/repository/blob_type_spec.rb
index cb634d6948023..1c27b6fca503f 100644
--- a/spec/graphql/types/repository/blob_type_spec.rb
+++ b/spec/graphql/types/repository/blob_type_spec.rb
@@ -19,7 +19,7 @@
       :size,
       :raw_size,
       :raw_blob,
-      :unicode_escaped_blob,
+      :base64_encoded_blob,
       :raw_text_blob,
       :file_type,
       :edit_blob_path,
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb
index 3ec26ca5ab5e3..db7961fc0c9ad 100644
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@@ -116,33 +116,6 @@
     end
   end
 
-  describe '#encode_uft8_with_unicode_escaping!' do
-    where(:input, :expected) do
-      "abcd" | "abcd"
-      "🐤🐤🐤🐤\xF0\x9F\x90" | "🐤🐤🐤🐤\\u00f0\\u009f\\u0090"
-      "\xD0\x9F\xD1\x80 \x90" | "Пр \\u0090"
-      "abcd \xE9efgh" | "abcd \\u00e9efgh"
-      "\xFE\xFF\x00\x41BC" | "\\u00fe\\u00ffABC" # An "ABC" prepended with UTF-16-BE BOM
-      "\xFF\xFE\x00\ABC\xE9" | "\\u00ff\\u00feABC\\u00e9" # An "ABC" prepended with UTF-16-LE BOM and an added e-acute.
-    end
-
-    with_them do
-      it 'escapes to unicode' do
-        expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::UTF_16BE))).to eq(expected)
-        expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::UTF_8))).to eq(expected)
-        expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::UTF_16BE))).to eq(expected)
-        expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::WINDOWS_1252))).to eq(expected)
-        expect(ext_class.encode_uft8_with_unicode_escaping(input)).to eq(expected)
-      end
-    end
-
-    it 'does not mutate the input message' do
-      input = "🐤🐤🐤🐤\xF0\x9F\x90"
-      ext_class.encode_uft8_with_unicode_escaping(input)
-      expect(input).to eq("🐤🐤🐤🐤\xF0\x9F\x90")
-    end
-  end
-
   describe '#encode_utf8' do
     [
       ["nil", nil, nil],
diff --git a/spec/presenters/blob_presenter_spec.rb b/spec/presenters/blob_presenter_spec.rb
index 9502556d40809..f9978749221d2 100644
--- a/spec/presenters/blob_presenter_spec.rb
+++ b/spec/presenters/blob_presenter_spec.rb
@@ -390,12 +390,17 @@
     end
   end
 
-  describe '#unicode_escaped_blob' do
+  describe '#base64_encoded_blob' do
     let(:blob) { repository.blob_at('HEAD', file) }
     let(:file) { 'files/ruby/popen.rb' }
 
     it 'does not include html in the content' do
-      expect(presenter.unicode_escaped_blob.include?('</span>')).to be_falsey
+      expect(presenter.base64_encoded_blob.include?('</span>')).to be_falsey
+    end
+
+    it 'encodes the raw blob base 64' do
+      expect(presenter.base64_encoded_blob).to include("cmVxdWlyZSAnZmlsZXV0")
+      expect(presenter.base64_encoded_blob).to include("R1cwogIGVuZAplbmQK\n")
     end
 
     context 'when ff unicode_escaped_blob is disabled' do
@@ -404,7 +409,7 @@
       end
 
       it 'returns nil' do
-        expect(presenter.unicode_escaped_blob).to be_nil
+        expect(presenter.base64_encoded_blob).to be_nil
       end
     end
   end
-- 
GitLab