From 4eaa944cd12bff260b6e94c75e7c6dd3b9058f07 Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Fri, 17 Mar 2023 13:13:05 -0700 Subject: [PATCH] Fix Google CDN not encoding UTF-8 characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously if a job artifact were prefaced with "©ï¸", those bytes were stripped when rendering a Google CDN URL because the URL did not properly encode the characters. This commit now uses `Addressable::URI.encode_component` to do the job. `CarrierWave::Utilities::Uri#encode_path` does the same thing. Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/396945 Changelog: fixed --- app/uploaders/object_storage/cdn/google_cdn.rb | 2 +- .../uploaders/object_storage/cdn/google_cdn_spec.rb | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/app/uploaders/object_storage/cdn/google_cdn.rb b/app/uploaders/object_storage/cdn/google_cdn.rb index f1fe62e9db3f8..f39729357ed02 100644 --- a/app/uploaders/object_storage/cdn/google_cdn.rb +++ b/app/uploaders/object_storage/cdn/google_cdn.rb @@ -28,7 +28,7 @@ def signed_url(path, expiry: 10.minutes, params: {}) expiration = (Time.current + expiry).utc.to_i uri = Addressable::URI.parse(cdn_url) - uri.path = path + uri.path = Addressable::URI.encode_component(path, Addressable::URI::CharacterClasses::PATH) # Use an Array to preserve order: Google CDN needs to have # Expires, KeyName, and Signature in that order or it will return a 403 error: # https://cloud.google.com/cdn/docs/troubleshooting-steps#signing diff --git a/spec/uploaders/object_storage/cdn/google_cdn_spec.rb b/spec/uploaders/object_storage/cdn/google_cdn_spec.rb index 184c664f6dca6..96413f622e80a 100644 --- a/spec/uploaders/object_storage/cdn/google_cdn_spec.rb +++ b/spec/uploaders/object_storage/cdn/google_cdn_spec.rb @@ -99,9 +99,10 @@ let(:path) { '/path/to/file.txt' } let(:expiration) { (Time.current + 10.minutes).utc.to_i } let(:cdn_query_params) { "Expires=#{expiration}&KeyName=#{key_name}" } + let(:encoded_path) { Addressable::URI.encode_component(path, Addressable::URI::CharacterClasses::PATH) } def verify_signature(url, unsigned_url) - expect(url).to start_with("#{options[:url]}#{path}") + expect(url).to start_with("#{options[:url]}#{encoded_path}") uri = Addressable::URI.parse(url) query = uri.query_values @@ -116,6 +117,16 @@ def verify_signature(url, unsigned_url) end end + context 'with UTF-8 characters in path' do + let(:path) { "/path/to/©ï¸job🧪" } + let(:url) { subject.signed_url(path) } + let(:unsigned_url) { "#{options[:url]}#{encoded_path}?#{cdn_query_params}" } + + it 'returns a valid signed URL' do + verify_signature(url, unsigned_url) + end + end + context 'with default query parameters' do let(:url) { subject.signed_url(path) } let(:unsigned_url) { "#{options[:url]}#{path}?#{cdn_query_params}" } -- GitLab