From cb46ee6624878f9a34fdf87b183c1bd94ceafeaf Mon Sep 17 00:00:00 2001 From: ghinfey <ghinfey@gitlab.com> Date: Wed, 15 May 2024 10:27:06 +0100 Subject: [PATCH] Add base64_encoded_blob field Add a graphql field to return blobs encoded base64. --- app/graphql/types/repository/blob_type.rb | 5 ++-- app/presenters/blob_presenter.rb | 4 +-- doc/api/graphql/reference/index.md | 2 +- lib/gitlab/encoding_helper.rb | 10 ------- .../types/repository/blob_type_spec.rb | 2 +- spec/lib/gitlab/encoding_helper_spec.rb | 27 ------------------- spec/presenters/blob_presenter_spec.rb | 11 +++++--- 7 files changed, 14 insertions(+), 47 deletions(-) diff --git a/app/graphql/types/repository/blob_type.rb b/app/graphql/types/repository/blob_type.rb index ee41f4b24a43e..a5c9d6940ceb6 100644 --- a/app/graphql/types/repository/blob_type.rb +++ b/app/graphql/types/repository/blob_type.rb @@ -53,9 +53,8 @@ class BlobType < BaseObject field :raw_blob, GraphQL::Types::String, null: true, method: :data, description: 'Raw content of the blob.' - field :unicode_escaped_blob, GraphQL::Types::String, null: true, method: :unicode_escaped_blob, - alpha: { milestone: '16.11' }, description: 'Raw content of the blob where invalid UTF-8 characters are - escaped to unicode. Returns `null` if the `unicode_escaped_data` feature flag is disabled.' + field :base64_encoded_blob, GraphQL::Types::String, null: true, method: :base64_encoded_blob, + alpha: { milestone: '17.1' }, description: 'Content of blob is encoded base64. Returns `null` if the `unicode_escaped_data` feature flag is disabled.' field :raw_text_blob, GraphQL::Types::String, null: true, method: :text_only_data, description: 'Raw content of the blob, if the blob is text data.' diff --git a/app/presenters/blob_presenter.rb b/app/presenters/blob_presenter.rb index 66d47d675025d..b42e769d349f3 100644 --- a/app/presenters/blob_presenter.rb +++ b/app/presenters/blob_presenter.rb @@ -95,10 +95,10 @@ def blame_path url_helpers.project_blame_path(*path_params) end - def unicode_escaped_blob + def base64_encoded_blob return unless Feature.enabled?(:unicode_escaped_blob) - encode_uft8_with_unicode_escaping(blob.raw) + Base64.encode64(blob.raw) end def history_path diff --git a/doc/api/graphql/reference/index.md b/doc/api/graphql/reference/index.md index b8b8a0114e677..112b706da9a0e 100644 --- a/doc/api/graphql/reference/index.md +++ b/doc/api/graphql/reference/index.md @@ -29304,6 +29304,7 @@ Returns [`RepositoryCodeownerValidation`](#repositorycodeownervalidation). | Name | Type | Description | | ---- | ---- | ----------- | | <a id="repositoryblobarchived"></a>`archived` | [`Boolean`](#boolean) | Whether the current project is archived. | +| <a id="repositoryblobbase64encodedblob"></a>`base64EncodedBlob` **{warning-solid}** | [`String`](#string) | **Introduced** in GitLab 17.1. **Status**: Experiment. Content of blob is encoded base64. Returns `null` if the `unicode_escaped_data` feature flag is disabled. | | <a id="repositoryblobblamepath"></a>`blamePath` | [`String`](#string) | Web path to blob blame page. | | <a id="repositoryblobcancurrentuserpushtobranch"></a>`canCurrentUserPushToBranch` | [`Boolean`](#boolean) | Whether the current user can push to the branch. | | <a id="repositoryblobcanmodifyblob"></a>`canModifyBlob` | [`Boolean`](#boolean) | Whether the current user can modify the blob. | @@ -29342,7 +29343,6 @@ Returns [`RepositoryCodeownerValidation`](#repositorycodeownervalidation). | <a id="repositoryblobsimpleviewer"></a>`simpleViewer` | [`BlobViewer!`](#blobviewer) | Blob content simple viewer. | | <a id="repositoryblobsize"></a>`size` | [`BigInt`](#bigint) | Size (in bytes) of the blob. | | <a id="repositoryblobstoredexternally"></a>`storedExternally` | [`Boolean`](#boolean) | Whether the blob's content is stored externally (for instance, in LFS). | -| <a id="repositoryblobunicodeescapedblob"></a>`unicodeEscapedBlob` **{warning-solid}** | [`String`](#string) | **Introduced** in GitLab 16.11. **Status**: Experiment. Raw content of the blob where invalid UTF-8 characters are escaped to unicode. Returns `null` if the `unicode_escaped_data` feature flag is disabled. | | <a id="repositoryblobwebpath"></a>`webPath` | [`String`](#string) | Web path of the blob. | #### Fields with arguments diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index bec19fc56af31..e6cbf3b1dff0e 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -85,16 +85,6 @@ def encode_utf8_with_escaping!(message) message.replace clean(message) end - # Escapes unsupported UTF-8 characters with their equivalent unicode. - def encode_uft8_with_unicode_escaping(message) - encoded_message = force_encode_utf8(message.dup) - return encoded_message if encoded_message.valid_encoding? - - encoded_message = encoded_message.chars.map { |char| char.valid_encoding? ? char : escape_unicode(char) }.join - - clean(encoded_message) - end - def encode_utf8(message, replace: "") message = force_encode_utf8(message) return message if message.valid_encoding? diff --git a/spec/graphql/types/repository/blob_type_spec.rb b/spec/graphql/types/repository/blob_type_spec.rb index cb634d6948023..1c27b6fca503f 100644 --- a/spec/graphql/types/repository/blob_type_spec.rb +++ b/spec/graphql/types/repository/blob_type_spec.rb @@ -19,7 +19,7 @@ :size, :raw_size, :raw_blob, - :unicode_escaped_blob, + :base64_encoded_blob, :raw_text_blob, :file_type, :edit_blob_path, diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb index 3ec26ca5ab5e3..db7961fc0c9ad 100644 --- a/spec/lib/gitlab/encoding_helper_spec.rb +++ b/spec/lib/gitlab/encoding_helper_spec.rb @@ -116,33 +116,6 @@ end end - describe '#encode_uft8_with_unicode_escaping!' do - where(:input, :expected) do - "abcd" | "abcd" - "ðŸ¤ðŸ¤ðŸ¤ðŸ¤\xF0\x9F\x90" | "ðŸ¤ðŸ¤ðŸ¤ðŸ¤\\u00f0\\u009f\\u0090" - "\xD0\x9F\xD1\x80 \x90" | "Пр \\u0090" - "abcd \xE9efgh" | "abcd \\u00e9efgh" - "\xFE\xFF\x00\x41BC" | "\\u00fe\\u00ffABC" # An "ABC" prepended with UTF-16-BE BOM - "\xFF\xFE\x00\ABC\xE9" | "\\u00ff\\u00feABC\\u00e9" # An "ABC" prepended with UTF-16-LE BOM and an added e-acute. - end - - with_them do - it 'escapes to unicode' do - expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::UTF_16BE))).to eq(expected) - expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::UTF_8))).to eq(expected) - expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::UTF_16BE))).to eq(expected) - expect(ext_class.encode_uft8_with_unicode_escaping(input.dup.force_encoding(Encoding::WINDOWS_1252))).to eq(expected) - expect(ext_class.encode_uft8_with_unicode_escaping(input)).to eq(expected) - end - end - - it 'does not mutate the input message' do - input = "ðŸ¤ðŸ¤ðŸ¤ðŸ¤\xF0\x9F\x90" - ext_class.encode_uft8_with_unicode_escaping(input) - expect(input).to eq("ðŸ¤ðŸ¤ðŸ¤ðŸ¤\xF0\x9F\x90") - end - end - describe '#encode_utf8' do [ ["nil", nil, nil], diff --git a/spec/presenters/blob_presenter_spec.rb b/spec/presenters/blob_presenter_spec.rb index 9502556d40809..f9978749221d2 100644 --- a/spec/presenters/blob_presenter_spec.rb +++ b/spec/presenters/blob_presenter_spec.rb @@ -390,12 +390,17 @@ end end - describe '#unicode_escaped_blob' do + describe '#base64_encoded_blob' do let(:blob) { repository.blob_at('HEAD', file) } let(:file) { 'files/ruby/popen.rb' } it 'does not include html in the content' do - expect(presenter.unicode_escaped_blob.include?('</span>')).to be_falsey + expect(presenter.base64_encoded_blob.include?('</span>')).to be_falsey + end + + it 'encodes the raw blob base 64' do + expect(presenter.base64_encoded_blob).to include("cmVxdWlyZSAnZmlsZXV0") + expect(presenter.base64_encoded_blob).to include("R1cwogIGVuZAplbmQK\n") end context 'when ff unicode_escaped_blob is disabled' do @@ -404,7 +409,7 @@ end it 'returns nil' do - expect(presenter.unicode_escaped_blob).to be_nil + expect(presenter.base64_encoded_blob).to be_nil end end end -- GitLab