From 4eaa944cd12bff260b6e94c75e7c6dd3b9058f07 Mon Sep 17 00:00:00 2001
From: Stan Hu <stanhu@gmail.com>
Date: Fri, 17 Mar 2023 13:13:05 -0700
Subject: [PATCH] Fix Google CDN not encoding UTF-8 characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously if a job artifact were prefaced with "©️", those bytes were
stripped when rendering a Google CDN URL because the URL did not
properly encode the characters. This commit now uses
`Addressable::URI.encode_component` to do the
job. `CarrierWave::Utilities::Uri#encode_path` does the same thing.

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/396945

Changelog: fixed
---
 app/uploaders/object_storage/cdn/google_cdn.rb      |  2 +-
 .../uploaders/object_storage/cdn/google_cdn_spec.rb | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/app/uploaders/object_storage/cdn/google_cdn.rb b/app/uploaders/object_storage/cdn/google_cdn.rb
index f1fe62e9db3f8..f39729357ed02 100644
--- a/app/uploaders/object_storage/cdn/google_cdn.rb
+++ b/app/uploaders/object_storage/cdn/google_cdn.rb
@@ -28,7 +28,7 @@ def signed_url(path, expiry: 10.minutes, params: {})
         expiration = (Time.current + expiry).utc.to_i
 
         uri = Addressable::URI.parse(cdn_url)
-        uri.path = path
+        uri.path = Addressable::URI.encode_component(path, Addressable::URI::CharacterClasses::PATH)
         # Use an Array to preserve order: Google CDN needs to have
         # Expires, KeyName, and Signature in that order or it will return a 403 error:
         # https://cloud.google.com/cdn/docs/troubleshooting-steps#signing
diff --git a/spec/uploaders/object_storage/cdn/google_cdn_spec.rb b/spec/uploaders/object_storage/cdn/google_cdn_spec.rb
index 184c664f6dca6..96413f622e80a 100644
--- a/spec/uploaders/object_storage/cdn/google_cdn_spec.rb
+++ b/spec/uploaders/object_storage/cdn/google_cdn_spec.rb
@@ -99,9 +99,10 @@
     let(:path) { '/path/to/file.txt' }
     let(:expiration) { (Time.current + 10.minutes).utc.to_i }
     let(:cdn_query_params) { "Expires=#{expiration}&KeyName=#{key_name}" }
+    let(:encoded_path) { Addressable::URI.encode_component(path, Addressable::URI::CharacterClasses::PATH) }
 
     def verify_signature(url, unsigned_url)
-      expect(url).to start_with("#{options[:url]}#{path}")
+      expect(url).to start_with("#{options[:url]}#{encoded_path}")
 
       uri = Addressable::URI.parse(url)
       query = uri.query_values
@@ -116,6 +117,16 @@ def verify_signature(url, unsigned_url)
       end
     end
 
+    context 'with UTF-8 characters in path' do
+      let(:path) { "/path/to/©️job🧪" }
+      let(:url) { subject.signed_url(path) }
+      let(:unsigned_url) { "#{options[:url]}#{encoded_path}?#{cdn_query_params}" }
+
+      it 'returns a valid signed URL' do
+        verify_signature(url, unsigned_url)
+      end
+    end
+
     context 'with default query parameters' do
       let(:url) { subject.signed_url(path) }
       let(:unsigned_url) { "#{options[:url]}#{path}?#{cdn_query_params}" }
-- 
GitLab