From 9da1771de45e213eeb116cf9fd41434a75efbd2d Mon Sep 17 00:00:00 2001 From: Sean McGivern <sean@gitlab.com> Date: Fri, 19 Mar 2021 17:44:13 +0000 Subject: [PATCH] Immediately unlink Rack::Multipart temporary files Rack writes files from multipart/form-data requests to disk in a temporary file. Rack includes a middleware to clean these up - Rack::TempfileReaper - but that won't withstand a process being sent SIGKILL. To handle that case, we can immediately unlink the created temporary file, which means it will be removed once we're done with it or the current process goes away. For development mode and test mode, we have to ensure that this new middleware is before Gitlab::Middleware::Static, otherwise we might not get the chance to set our own middleware. With direct upload configured, GitLab mostly doesn't accept multipart/form-data requests in a way where they reach Rack directly - they typically go via Workhorse which accelerates them - but there are cases where it can happen, and direct upload is still only an option. To test this manually, we can set `$GITLAB_API_TOKEN_LOCAL` to a personal access token for the API in the local environment, `$PATH_TO_FILE` to be a path to a (preferably large) file to be uploaded, and break the actual saving of uploads (in the default case with GDK, stop Minio): curl -H "Private-Token: $GITLAB_API_TOKEN_LOCAL" \ -F "file=@$PATH_TO_FILE" \ http://localhost:3000/api/v4/projects/1/uploads Once the upload is finished and the request fails, we'll see the file we uploaded in `$TMPDIR`: $ ls -l $TMPDIR/RackMultipart* | awk '{ print $5, $8 }' 952107008 17:40 With this change, that won't happen: we'll see the file created and immediately unlinked, so no matter what happens, it won't stick around on disk. (This specific test case is handled by Rack::TempfileReaper in later versions of Rack, but it still depends on manual cleanup.) --- config/application.rb | 3 + .../rack_multipart_tempfile_factory.rb | 25 +++++ .../rack_multipart_tempfile_factory_spec.rb | 94 +++++++++++++++++++ spec/requests/api/projects_spec.rb | 19 +++- 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 lib/gitlab/middleware/rack_multipart_tempfile_factory.rb create mode 100644 spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb diff --git a/config/application.rb b/config/application.rb index e5710edc8115e..03d96fdf85e0c 100644 --- a/config/application.rb +++ b/config/application.rb @@ -29,6 +29,7 @@ class Application < Rails::Application require_dependency Rails.root.join('lib/gitlab/middleware/same_site_cookies') require_dependency Rails.root.join('lib/gitlab/middleware/handle_ip_spoof_attack_error') require_dependency Rails.root.join('lib/gitlab/middleware/handle_malformed_strings') + require_dependency Rails.root.join('lib/gitlab/middleware/rack_multipart_tempfile_factory') require_dependency Rails.root.join('lib/gitlab/runtime') # Settings in config/environments/* take precedence over those specified here. @@ -271,6 +272,8 @@ class Application < Rails::Application config.middleware.insert_after ActionDispatch::ActionableExceptions, ::Gitlab::Middleware::HandleMalformedStrings + config.middleware.insert_after Rack::Sendfile, ::Gitlab::Middleware::RackMultipartTempfileFactory + # Allow access to GitLab API from other domains config.middleware.insert_before Warden::Manager, Rack::Cors do headers_to_expose = %w[Link X-Total X-Total-Pages X-Per-Page X-Page X-Next-Page X-Prev-Page X-Gitlab-Blob-Id X-Gitlab-Commit-Id X-Gitlab-Content-Sha256 X-Gitlab-Encoding X-Gitlab-File-Name X-Gitlab-File-Path X-Gitlab-Last-Commit-Id X-Gitlab-Ref X-Gitlab-Size] diff --git a/lib/gitlab/middleware/rack_multipart_tempfile_factory.rb b/lib/gitlab/middleware/rack_multipart_tempfile_factory.rb new file mode 100644 index 0000000000000..d16c068c3c0cf --- /dev/null +++ b/lib/gitlab/middleware/rack_multipart_tempfile_factory.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module Middleware + class RackMultipartTempfileFactory + # Immediately unlink the created temporary file so we don't have to rely + # on Rack::TempfileReaper catching this after the fact. + FACTORY = lambda do |filename, content_type| + Rack::Multipart::Parser::TEMPFILE_FACTORY.call(filename, content_type).tap(&:unlink) + end + + def initialize(app) + @app = app + end + + def call(env) + if ENV['GITLAB_TEMPFILE_IMMEDIATE_UNLINK'] == '1' + env[Rack::RACK_MULTIPART_TEMPFILE_FACTORY] = FACTORY + end + + @app.call(env) + end + end + end +end diff --git a/spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb b/spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb new file mode 100644 index 0000000000000..b9d00b556c5c3 --- /dev/null +++ b/spec/lib/gitlab/middleware/rack_multipart_tempfile_factory_spec.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' +require 'rack' + +RSpec.describe Gitlab::Middleware::RackMultipartTempfileFactory do + let(:app) do + lambda do |env| + params = Rack::Request.new(env).params + + if params['file'] + [200, { 'Content-Type' => params['file'][:type] }, [params['file'][:tempfile].read]] + else + [204, {}, []] + end + end + end + + let(:file_contents) { '/9j/4AAQSkZJRgABAQAAAQABAAD//gA+Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcg' } + + let(:multipart_fixture) do + boundary = 'AaB03x' + data = <<~DATA + --#{boundary}\r + Content-Disposition: form-data; name="file"; filename="dj.jpg"\r + Content-Type: image/jpeg\r + Content-Transfer-Encoding: base64\r + \r + #{file_contents}\r + --#{boundary}--\r + DATA + + { + 'CONTENT_TYPE' => "multipart/form-data; boundary=#{boundary}", + 'CONTENT_LENGTH' => data.bytesize.to_s, + input: StringIO.new(data) + } + end + + subject { described_class.new(app) } + + context 'for a multipart request' do + let(:env) { Rack::MockRequest.env_for('/', multipart_fixture) } + + context 'when the environment variable is enabled' do + before do + stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1') + end + + it 'immediately unlinks the temporary file' do + tempfile = Tempfile.new('foo') + + expect(tempfile.path).not_to be(nil) + expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile) + expect(tempfile).to receive(:unlink).and_call_original + + subject.call(env) + + expect(tempfile.path).to be(nil) + end + + it 'processes the request as normal' do + expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]]) + end + end + + context 'when the environment variable is disabled' do + it 'does not immediately unlink the temporary file' do + tempfile = Tempfile.new('foo') + + expect(tempfile.path).not_to be(nil) + expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile) + expect(tempfile).not_to receive(:unlink).and_call_original + + subject.call(env) + + expect(tempfile.path).not_to be(nil) + end + + it 'processes the request as normal' do + expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]]) + end + end + end + + context 'for a regular request' do + let(:env) { Rack::MockRequest.env_for('/', params: { 'foo' => 'bar' }) } + + it 'does nothing' do + expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).not_to receive(:call) + expect(subject.call(env)).to eq([204, {}, []]) + end + end +end diff --git a/spec/requests/api/projects_spec.rb b/spec/requests/api/projects_spec.rb index 00b732b4a361b..d4fb49a8f5031 100644 --- a/spec/requests/api/projects_spec.rb +++ b/spec/requests/api/projects_spec.rb @@ -1584,7 +1584,6 @@ expect(json_response['alt']).to eq("dk") expect(json_response['url']).to start_with("/uploads/") expect(json_response['url']).to end_with("/dk.png") - expect(json_response['full_path']).to start_with("/#{project.namespace.path}/#{project.path}/uploads") end @@ -1596,6 +1595,24 @@ post api("/projects/#{project.id}/uploads", user), params: { file: file } end + it "does not leave the temporary file in place after uploading, even when the tempfile reaper does not run" do + stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1') + tempfile = Tempfile.new('foo') + path = tempfile.path + + allow_any_instance_of(Rack::TempfileReaper).to receive(:call) do |instance, env| + instance.instance_variable_get(:@app).call(env) + end + + expect(path).not_to be(nil) + expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile) + + post api("/projects/#{project.id}/uploads", user), params: { file: fixture_file_upload("spec/fixtures/dk.png", "image/png") } + + expect(tempfile.path).to be(nil) + expect(File.exist?(path)).to be(false) + end + shared_examples 'capped upload attachments' do it "limits the upload to 1 GB" do expect_next_instance_of(UploadService) do |instance| -- GitLab