From 9da1771de45e213eeb116cf9fd41434a75efbd2d Mon Sep 17 00:00:00 2001
From: Sean McGivern <sean@gitlab.com>
Date: Fri, 19 Mar 2021 17:44:13 +0000
Subject: [PATCH] Immediately unlink Rack::Multipart temporary files

Rack writes files from multipart/form-data requests to disk in a
temporary file. Rack includes a middleware to clean these up -
Rack::TempfileReaper - but that won't withstand a process being sent
SIGKILL. To handle that case, we can immediately unlink the created
temporary file, which means it will be removed once we're done with it
or the current process goes away.

For development mode and test mode, we have to ensure that this new
middleware is before Gitlab::Middleware::Static, otherwise we might not
get the chance to set our own middleware.

With direct upload configured, GitLab mostly doesn't accept
multipart/form-data requests in a way where they reach Rack directly -
they typically go via Workhorse which accelerates them - but there are
cases where it can happen, and direct upload is still only an option.

To test this manually, we can set `$GITLAB_API_TOKEN_LOCAL` to a
personal access token for the API in the local environment,
`$PATH_TO_FILE` to be a path to a (preferably large) file to be
uploaded, and break the actual saving of uploads (in the default case
with GDK, stop Minio):

    curl -H "Private-Token: $GITLAB_API_TOKEN_LOCAL" \
      -F "file=@$PATH_TO_FILE" \
      http://localhost:3000/api/v4/projects/1/uploads

Once the upload is finished and the request fails, we'll see the file we
uploaded in `$TMPDIR`:

    $ ls -l $TMPDIR/RackMultipart* | awk '{ print $5, $8 }'
    952107008 17:40

With this change, that won't happen: we'll see the file created and
immediately unlinked, so no matter what happens, it won't stick around
on disk. (This specific test case is handled by Rack::TempfileReaper in
later versions of Rack, but it still depends on manual cleanup.)
---
 config/application.rb                         |  3 +
 .../rack_multipart_tempfile_factory.rb        | 25 +++++
 .../rack_multipart_tempfile_factory_spec.rb   | 94 +++++++++++++++++++
 spec/requests/api/projects_spec.rb            | 19 +++-
 4 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 lib/gitlab/middleware/rack_multipart_tempfile_factory.rb
 create mode 100644 spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb

diff --git a/config/application.rb b/config/application.rb
index e5710edc8115e..03d96fdf85e0c 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -29,6 +29,7 @@ class Application < Rails::Application
     require_dependency Rails.root.join('lib/gitlab/middleware/same_site_cookies')
     require_dependency Rails.root.join('lib/gitlab/middleware/handle_ip_spoof_attack_error')
     require_dependency Rails.root.join('lib/gitlab/middleware/handle_malformed_strings')
+    require_dependency Rails.root.join('lib/gitlab/middleware/rack_multipart_tempfile_factory')
     require_dependency Rails.root.join('lib/gitlab/runtime')
 
     # Settings in config/environments/* take precedence over those specified here.
@@ -271,6 +272,8 @@ class Application < Rails::Application
 
     config.middleware.insert_after ActionDispatch::ActionableExceptions, ::Gitlab::Middleware::HandleMalformedStrings
 
+    config.middleware.insert_after Rack::Sendfile, ::Gitlab::Middleware::RackMultipartTempfileFactory
+
     # Allow access to GitLab API from other domains
     config.middleware.insert_before Warden::Manager, Rack::Cors do
       headers_to_expose = %w[Link X-Total X-Total-Pages X-Per-Page X-Page X-Next-Page X-Prev-Page X-Gitlab-Blob-Id X-Gitlab-Commit-Id X-Gitlab-Content-Sha256 X-Gitlab-Encoding X-Gitlab-File-Name X-Gitlab-File-Path X-Gitlab-Last-Commit-Id X-Gitlab-Ref X-Gitlab-Size]
diff --git a/lib/gitlab/middleware/rack_multipart_tempfile_factory.rb b/lib/gitlab/middleware/rack_multipart_tempfile_factory.rb
new file mode 100644
index 0000000000000..d16c068c3c0cf
--- /dev/null
+++ b/lib/gitlab/middleware/rack_multipart_tempfile_factory.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Middleware
+    class RackMultipartTempfileFactory
+      # Immediately unlink the created temporary file so we don't have to rely
+      # on Rack::TempfileReaper catching this after the fact.
+      FACTORY = lambda do |filename, content_type|
+        Rack::Multipart::Parser::TEMPFILE_FACTORY.call(filename, content_type).tap(&:unlink)
+      end
+
+      def initialize(app)
+        @app = app
+      end
+
+      def call(env)
+        if ENV['GITLAB_TEMPFILE_IMMEDIATE_UNLINK'] == '1'
+          env[Rack::RACK_MULTIPART_TEMPFILE_FACTORY] = FACTORY
+        end
+
+        @app.call(env)
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb b/spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb
new file mode 100644
index 0000000000000..b9d00b556c5c3
--- /dev/null
+++ b/spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb
@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+require 'rack'
+
+RSpec.describe Gitlab::Middleware::RackMultipartTempfileFactory do
+  let(:app) do
+    lambda do |env|
+      params = Rack::Request.new(env).params
+
+      if params['file']
+        [200, { 'Content-Type' => params['file'][:type] }, [params['file'][:tempfile].read]]
+      else
+        [204, {}, []]
+      end
+    end
+  end
+
+  let(:file_contents) { '/9j/4AAQSkZJRgABAQAAAQABAAD//gA+Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcg' }
+
+  let(:multipart_fixture) do
+    boundary = 'AaB03x'
+    data = <<~DATA
+      --#{boundary}\r
+      Content-Disposition: form-data; name="file"; filename="dj.jpg"\r
+      Content-Type: image/jpeg\r
+      Content-Transfer-Encoding: base64\r
+      \r
+      #{file_contents}\r
+      --#{boundary}--\r
+    DATA
+
+    {
+      'CONTENT_TYPE' => "multipart/form-data; boundary=#{boundary}",
+      'CONTENT_LENGTH' => data.bytesize.to_s,
+      input: StringIO.new(data)
+    }
+  end
+
+  subject { described_class.new(app) }
+
+  context 'for a multipart request' do
+    let(:env) { Rack::MockRequest.env_for('/', multipart_fixture) }
+
+    context 'when the environment variable is enabled' do
+      before do
+        stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1')
+      end
+
+      it 'immediately unlinks the temporary file' do
+        tempfile = Tempfile.new('foo')
+
+        expect(tempfile.path).not_to be(nil)
+        expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
+        expect(tempfile).to receive(:unlink).and_call_original
+
+        subject.call(env)
+
+        expect(tempfile.path).to be(nil)
+      end
+
+      it 'processes the request as normal' do
+        expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]])
+      end
+    end
+
+    context 'when the environment variable is disabled' do
+      it 'does not immediately unlink the temporary file' do
+        tempfile = Tempfile.new('foo')
+
+        expect(tempfile.path).not_to be(nil)
+        expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
+        expect(tempfile).not_to receive(:unlink).and_call_original
+
+        subject.call(env)
+
+        expect(tempfile.path).not_to be(nil)
+      end
+
+      it 'processes the request as normal' do
+        expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]])
+      end
+    end
+  end
+
+  context 'for a regular request' do
+    let(:env) { Rack::MockRequest.env_for('/', params: { 'foo' => 'bar' }) }
+
+    it 'does nothing' do
+      expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).not_to receive(:call)
+      expect(subject.call(env)).to eq([204, {}, []])
+    end
+  end
+end
diff --git a/spec/requests/api/projects_spec.rb b/spec/requests/api/projects_spec.rb
index 00b732b4a361b..d4fb49a8f5031 100644
--- a/spec/requests/api/projects_spec.rb
+++ b/spec/requests/api/projects_spec.rb
@@ -1584,7 +1584,6 @@
       expect(json_response['alt']).to eq("dk")
       expect(json_response['url']).to start_with("/uploads/")
       expect(json_response['url']).to end_with("/dk.png")
-
       expect(json_response['full_path']).to start_with("/#{project.namespace.path}/#{project.path}/uploads")
     end
 
@@ -1596,6 +1595,24 @@
       post api("/projects/#{project.id}/uploads", user), params: { file: file }
     end
 
+    it "does not leave the temporary file in place after uploading, even when the tempfile reaper does not run" do
+      stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1')
+      tempfile = Tempfile.new('foo')
+      path = tempfile.path
+
+      allow_any_instance_of(Rack::TempfileReaper).to receive(:call) do |instance, env|
+        instance.instance_variable_get(:@app).call(env)
+      end
+
+      expect(path).not_to be(nil)
+      expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
+
+      post api("/projects/#{project.id}/uploads", user), params: { file: fixture_file_upload("spec/fixtures/dk.png", "image/png") }
+
+      expect(tempfile.path).to be(nil)
+      expect(File.exist?(path)).to be(false)
+    end
+
     shared_examples 'capped upload attachments' do
       it "limits the upload to 1 GB" do
         expect_next_instance_of(UploadService) do |instance|
-- 
GitLab