From fb77e461697b83cfe2fd535f984f8d3930a67742 Mon Sep 17 00:00:00 2001
From: Mario de la Ossa <mariodelaossa@gmail.com>
Date: Tue, 20 Oct 2020 12:47:28 -0600
Subject: [PATCH] Expand HandleNullBytes to handle malformed strings

Expands the HandleNullBytes middleware to also handle malformed strings.
Changes the middleware to return ActionController::BadRequest so we use
the default Rails bad request handling.
---
 .../unreleased/257822-null-byes-in-url.yml    |   5 +
 config/application.rb                         |   4 +-
 ...l_bytes.rb => handle_malformed_strings.rb} |  19 +--
 .../handle_malformed_strings_spec.rb          | 109 ++++++++++++++++++
 .../middleware/handle_null_bytes_spec.rb      |  88 --------------
 .../user_sends_malformed_strings_spec.rb      |  20 ++++
 spec/requests/user_sends_null_bytes_spec.rb   |  14 ---
 7 files changed, 148 insertions(+), 111 deletions(-)
 create mode 100644 changelogs/unreleased/257822-null-byes-in-url.yml
 rename lib/gitlab/middleware/{handle_null_bytes.rb => handle_malformed_strings.rb} (67%)
 create mode 100644 spec/lib/gitlab/middleware/handle_malformed_strings_spec.rb
 delete mode 100644 spec/lib/gitlab/middleware/handle_null_bytes_spec.rb
 create mode 100644 spec/requests/user_sends_malformed_strings_spec.rb
 delete mode 100644 spec/requests/user_sends_null_bytes_spec.rb

diff --git a/changelogs/unreleased/257822-null-byes-in-url.yml b/changelogs/unreleased/257822-null-byes-in-url.yml
new file mode 100644
index 0000000000000..6d9ef42929a36
--- /dev/null
+++ b/changelogs/unreleased/257822-null-byes-in-url.yml
@@ -0,0 +1,5 @@
+---
+title: Handle malformed strings in URL
+merge_request: 45701
+author:
+type: fixed
diff --git a/config/application.rb b/config/application.rb
index 75befc8a2482e..9046786acea54 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -28,7 +28,7 @@ class Application < Rails::Application
     require_dependency Rails.root.join('lib/gitlab/middleware/basic_health_check')
     require_dependency Rails.root.join('lib/gitlab/middleware/same_site_cookies')
     require_dependency Rails.root.join('lib/gitlab/middleware/handle_ip_spoof_attack_error')
-    require_dependency Rails.root.join('lib/gitlab/middleware/handle_null_bytes')
+    require_dependency Rails.root.join('lib/gitlab/middleware/handle_malformed_strings')
     require_dependency Rails.root.join('lib/gitlab/runtime')
 
     # Settings in config/environments/* take precedence over those specified here.
@@ -254,7 +254,7 @@ class Application < Rails::Application
 
     config.middleware.insert_before ActionDispatch::RemoteIp, ::Gitlab::Middleware::HandleIpSpoofAttackError
 
-    config.middleware.use ::Gitlab::Middleware::HandleNullBytes
+    config.middleware.insert_after ActionDispatch::ActionableExceptions, ::Gitlab::Middleware::HandleMalformedStrings
 
     # Allow access to GitLab API from other domains
     config.middleware.insert_before Warden::Manager, Rack::Cors do
diff --git a/lib/gitlab/middleware/handle_null_bytes.rb b/lib/gitlab/middleware/handle_malformed_strings.rb
similarity index 67%
rename from lib/gitlab/middleware/handle_null_bytes.rb
rename to lib/gitlab/middleware/handle_malformed_strings.rb
index c88dfb6ee0bdd..5fe3e6a1c7398 100644
--- a/lib/gitlab/middleware/handle_null_bytes.rb
+++ b/lib/gitlab/middleware/handle_malformed_strings.rb
@@ -2,9 +2,9 @@
 
 module Gitlab
   module Middleware
-    # There is no valid reason for a request to contain a null byte (U+0000)
+    # There is no valid reason for a request to contain a malformed string
     # so just return HTTP 400 (Bad Request) if we receive one
-    class HandleNullBytes
+    class HandleMalformedStrings
       NULL_BYTE_REGEX = Regexp.new(Regexp.escape("\u0000")).freeze
 
       attr_reader :app
@@ -14,18 +14,20 @@ def initialize(app)
       end
 
       def call(env)
-        return [400, {}, ["Bad Request"]] if request_has_null_byte?(env)
+        return [400, { 'Content-Type' => 'text/plain' }, ['Bad Request']] if request_contains_malformed_string?(env)
 
         app.call(env)
       end
 
       private
 
-      def request_has_null_byte?(request)
-        return false if ENV['REJECT_NULL_BYTES'] == "1"
+      def request_contains_malformed_string?(request)
+        return false if ENV['DISABLE_REQUEST_VALIDATION'] == '1'
 
         request = Rack::Request.new(request)
 
+        return true if string_malformed?(request.path)
+
         request.params.values.any? do |value|
           param_has_null_byte?(value)
         end
@@ -39,7 +41,7 @@ def param_has_null_byte?(value, depth = 0)
         depth += 1
 
         if value.respond_to?(:match)
-          string_contains_null_byte?(value)
+          string_malformed?(value)
         elsif value.respond_to?(:values)
           value.values.any? do |hash_value|
             param_has_null_byte?(hash_value, depth)
@@ -53,8 +55,11 @@ def param_has_null_byte?(value, depth = 0)
         end
       end
 
-      def string_contains_null_byte?(string)
+      def string_malformed?(string)
         string.match?(NULL_BYTE_REGEX)
+      rescue ArgumentError
+        # If we're here, we caught a malformed string. Return true
+        true
       end
     end
   end
diff --git a/spec/lib/gitlab/middleware/handle_malformed_strings_spec.rb b/spec/lib/gitlab/middleware/handle_malformed_strings_spec.rb
new file mode 100644
index 0000000000000..6680720e403f0
--- /dev/null
+++ b/spec/lib/gitlab/middleware/handle_malformed_strings_spec.rb
@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require "rack/test"
+
+RSpec.describe Gitlab::Middleware::HandleMalformedStrings do
+  let(:null_byte) { "\u0000" }
+  let(:invalid_string) { "mal\xC0formed" }
+  let(:error_400) { [400, { 'Content-Type' => 'text/plain' }, ['Bad Request']] }
+  let(:app) { double(:app) }
+
+  subject { described_class.new(app) }
+
+  before do
+    allow(app).to receive(:call) do |args|
+      args
+    end
+  end
+
+  def env_for(params = {})
+    Rack::MockRequest.env_for('/', { params: params })
+  end
+
+  context 'in the URL' do
+    it 'rejects null bytes' do
+      # We have to create the env separately or Rack::MockRequest complains about invalid URI
+      env = env_for
+      env['PATH_INFO'] = "/someplace/witha#{null_byte}nullbyte"
+
+      expect(subject.call(env)).to eq error_400
+    end
+
+    it 'rejects malformed strings' do
+      # We have to create the env separately or Rack::MockRequest complains about invalid URI
+      env = env_for
+      env['PATH_INFO'] = "/someplace/with_an/#{invalid_string}"
+
+      expect(subject.call(env)).to eq error_400
+    end
+  end
+
+  context 'in params' do
+    shared_examples_for 'checks params' do
+      it 'rejects bad params in a top level param' do
+        env = env_for(name: "null#{problematic_input}byte")
+
+        expect(subject.call(env)).to eq error_400
+      end
+
+      it "rejects bad params for hashes with strings" do
+        env = env_for(name: { inner_key: "I am #{problematic_input} bad" })
+
+        expect(subject.call(env)).to eq error_400
+      end
+
+      it "rejects bad params for arrays with strings" do
+        env = env_for(name: ["I am #{problematic_input} bad"])
+
+        expect(subject.call(env)).to eq error_400
+      end
+
+      it "rejects bad params for arrays containing hashes with string values" do
+        env = env_for(name: [
+          {
+            inner_key: "I am #{problematic_input} bad"
+          }
+        ])
+
+        expect(subject.call(env)).to eq error_400
+      end
+
+      it "gives up and does not reject too deeply nested params" do
+        env = env_for(name: [
+          {
+            inner_key: { deeper_key: [{ hash_inside_array_key: "I am #{problematic_input} bad" }] }
+          }
+        ])
+
+        expect(subject.call(env)).not_to eq error_400
+      end
+    end
+
+    context 'with null byte' do
+      it_behaves_like 'checks params' do
+        let(:problematic_input) { null_byte }
+      end
+    end
+
+    context 'with malformed strings' do
+      it_behaves_like 'checks params' do
+        let(:problematic_input) { invalid_string }
+      end
+    end
+  end
+
+  context 'without problematic input' do
+    it "does not error for strings" do
+      env = env_for(name: "safe name")
+
+      expect(subject.call(env)).not_to eq error_400
+    end
+
+    it "does not error with no params" do
+      env = env_for
+
+      expect(subject.call(env)).not_to eq error_400
+    end
+  end
+end
diff --git a/spec/lib/gitlab/middleware/handle_null_bytes_spec.rb b/spec/lib/gitlab/middleware/handle_null_bytes_spec.rb
deleted file mode 100644
index 76a5174817e27..0000000000000
--- a/spec/lib/gitlab/middleware/handle_null_bytes_spec.rb
+++ /dev/null
@@ -1,88 +0,0 @@
-# frozen_string_literal: true
-
-require 'spec_helper'
-require "rack/test"
-
-RSpec.describe Gitlab::Middleware::HandleNullBytes do
-  let(:null_byte) { "\u0000" }
-  let(:error_400) { [400, {}, ["Bad Request"]] }
-  let(:app) { double(:app) }
-
-  subject { described_class.new(app) }
-
-  before do
-    allow(app).to receive(:call) do |args|
-      args
-    end
-  end
-
-  def env_for(params = {})
-    Rack::MockRequest.env_for('/', { params: params })
-  end
-
-  context 'with null bytes in params' do
-    it 'rejects null bytes in a top level param' do
-      env = env_for(name: "null#{null_byte}byte")
-
-      expect(subject.call(env)).to eq error_400
-    end
-
-    it "responds with 400 BadRequest for hashes with strings" do
-      env = env_for(name: { inner_key: "I am #{null_byte} bad" })
-
-      expect(subject.call(env)).to eq error_400
-    end
-
-    it "responds with 400 BadRequest for arrays with strings" do
-      env = env_for(name: ["I am #{null_byte} bad"])
-
-      expect(subject.call(env)).to eq error_400
-    end
-
-    it "responds with 400 BadRequest for arrays containing hashes with string values" do
-      env = env_for(name: [
-        {
-          inner_key: "I am #{null_byte} bad"
-        }
-      ])
-
-      expect(subject.call(env)).to eq error_400
-    end
-
-    it "gives up and does not 400 with too deeply nested params" do
-      env = env_for(name: [
-        {
-          inner_key: { deeper_key: [{ hash_inside_array_key: "I am #{null_byte} bad" }] }
-        }
-      ])
-
-      expect(subject.call(env)).not_to eq error_400
-    end
-  end
-
-  context 'without null bytes in params' do
-    it "does not respond with a 400 for strings" do
-      env = env_for(name: "safe name")
-
-      expect(subject.call(env)).not_to eq error_400
-    end
-
-    it "does not respond with a 400 with no params" do
-      env = env_for
-
-      expect(subject.call(env)).not_to eq error_400
-    end
-  end
-
-  context 'when disabled via env flag' do
-    before do
-      stub_env('REJECT_NULL_BYTES', '1')
-    end
-
-    it 'does not respond with a 400 no matter what' do
-      env = env_for(name: "null#{null_byte}byte")
-
-      expect(subject.call(env)).not_to eq error_400
-    end
-  end
-end
diff --git a/spec/requests/user_sends_malformed_strings_spec.rb b/spec/requests/user_sends_malformed_strings_spec.rb
new file mode 100644
index 0000000000000..b6eda9159bc00
--- /dev/null
+++ b/spec/requests/user_sends_malformed_strings_spec.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe 'User sends malformed strings as params' do
+  let(:null_byte) { "\u0000" }
+  let(:invalid_string) { "mal\xC0formed" }
+
+  it 'raises a 400 error with a null byte' do
+    post '/nonexistent', params: { a: "A #{null_byte} nasty string" }
+
+    expect(response).to have_gitlab_http_status(:bad_request)
+  end
+
+  it 'raises a 400 error with an invalid string' do
+    post '/nonexistent', params: { a: "A #{invalid_string} nasty string" }
+
+    expect(response).to have_gitlab_http_status(:bad_request)
+  end
+end
diff --git a/spec/requests/user_sends_null_bytes_spec.rb b/spec/requests/user_sends_null_bytes_spec.rb
deleted file mode 100644
index 1ddfad409962d..0000000000000
--- a/spec/requests/user_sends_null_bytes_spec.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-# frozen_string_literal: true
-
-require 'spec_helper'
-
-RSpec.describe 'User sends null bytes as params' do
-  let(:null_byte) { "\u0000" }
-
-  it 'raises a 400 error' do
-    post '/nonexistent', params: { a: "A #{null_byte} nasty string" }
-
-    expect(response).to have_gitlab_http_status(:bad_request)
-    expect(response.body).to eq('Bad Request')
-  end
-end
-- 
GitLab