From d975d93cfa0e7995a8e9daa0dfbd088a461d1612 Mon Sep 17 00:00:00 2001
From: Rodrigo Tomonari <rtomonari@gitlab.com>
Date: Wed, 22 Jan 2025 03:53:20 +0000
Subject: [PATCH] Log decompressed size of compressed HTTP responses

Patch Net/HTTP to log the decompressed size of compressed HTTP responses
---
 config/initializers/1_settings.rb    |   1 +
 config/initializers/net_http.rb      |  23 +++++
 spec/initializers/1_settings_spec.rb |  13 +++
 spec/initializers/net_http_spec.rb   | 125 +++++++++++++++++++++++++++
 4 files changed, 162 insertions(+)
 create mode 100644 config/initializers/net_http.rb
 create mode 100644 spec/initializers/net_http_spec.rb

diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb
index 878e1be87d3d9..33e9c3dfb2fa3 100644
--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -248,6 +248,7 @@
 Settings.gitlab['max_request_duration_seconds'] ||= 57
 Settings.gitlab['display_initial_root_password'] = false if Settings.gitlab['display_initial_root_password'].nil?
 Settings.gitlab['weak_passwords_digest_set'] ||= YAML.safe_load(File.open(Rails.root.join('config', 'weak_password_digests.yml')), permitted_classes: [String]).to_set.freeze
+Settings.gitlab['log_decompressed_response_bytesize'] = ENV["GITLAB_LOG_DECOMPRESSED_RESPONSE_BYTESIZE"].to_i > 0 ? ENV["GITLAB_LOG_DECOMPRESSED_RESPONSE_BYTESIZE"].to_i : 0
 
 Gitlab.ee do
   Settings.gitlab['mirror_max_delay'] ||= 300
diff --git a/config/initializers/net_http.rb b/config/initializers/net_http.rb
new file mode 100644
index 0000000000000..3922d72ebef61
--- /dev/null
+++ b/config/initializers/net_http.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Net
+  class HTTPResponse
+    module FinishOverride
+      # rubocop:disable Gitlab/ModuleWithInstanceVariables -- This is a Monkey Patch
+      def finish
+        if Gitlab.config.gitlab.log_decompressed_response_bytesize > 0 &&
+            @inflate.total_out > Gitlab.config.gitlab.log_decompressed_response_bytesize
+          Gitlab::AppJsonLogger.debug(message: 'net/http: response decompressed', size: @inflate.total_out)
+        end
+
+        super
+      end
+      # rubocop:enable Gitlab/ModuleWithInstanceVariables
+    end
+
+    # Limit the maximum decompression size
+    class Inflater
+      prepend FinishOverride
+    end
+  end
+end
diff --git a/spec/initializers/1_settings_spec.rb b/spec/initializers/1_settings_spec.rb
index 0290cb85f4016..2f7adb47eab1b 100644
--- a/spec/initializers/1_settings_spec.rb
+++ b/spec/initializers/1_settings_spec.rb
@@ -41,4 +41,17 @@
       it { is_expected.to be(false) }
     end
   end
+
+  describe 'log_decompressed_response_bytesize' do
+    it { expect(Settings.gitlab.log_decompressed_response_bytesize).to eq(0) }
+
+    context 'when GITLAB_LOG_DECOMPRESSED_RESPONSE_BYTESIZE is set' do
+      before do
+        stub_env('GITLAB_LOG_DECOMPRESSED_RESPONSE_BYTESIZE', '10')
+        load_settings
+      end
+
+      it { expect(Settings.gitlab.log_decompressed_response_bytesize).to eq(10) }
+    end
+  end
 end
diff --git a/spec/initializers/net_http_spec.rb b/spec/initializers/net_http_spec.rb
new file mode 100644
index 0000000000000..3a8c85e079434
--- /dev/null
+++ b/spec/initializers/net_http_spec.rb
@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe 'Net::Http patch', feature_category: :integrations do
+  def gzip_compress(content)
+    buffer = StringIO.new
+    gzip = Zlib::GzipWriter.new(buffer)
+    gzip.write(content)
+    gzip.close
+    buffer.string
+  end
+
+  def read_body(res, io)
+    body = nil
+    res.reading_body io, true do
+      body = res.read_body
+    end
+    body
+  end
+
+  shared_examples 'logging behavior for decompressed content' do |size|
+    it 'logs the decompressed content size' do
+      expect(Gitlab::AppJsonLogger).to receive(:debug).with(message: 'net/http: response decompressed', size: size)
+
+      res = Net::HTTPResponse.read_new(io)
+      res.decode_content = true
+
+      read_body(res, io)
+    end
+  end
+
+  shared_examples 'no logging for decompressed content' do
+    it 'does not log the decompressed content size' do
+      expect(Gitlab::AppJsonLogger).not_to receive(:debug)
+
+      res = Net::HTTPResponse.read_new(io)
+      res.decode_content = true
+
+      read_body(res, io)
+    end
+  end
+
+  describe 'decompressing data' do
+    let(:body) { 'Hello world!' }
+    let(:io) do
+      gzip_body = gzip_compress(body)
+      response = <<~RESPONSE
+        HTTP/1.1 200 OK
+        Content-Encoding: gzip
+        Content-Type: text/plain
+
+        #{gzip_body}
+      RESPONSE
+
+      Net::BufferedIO.new(StringIO.new(response.force_encoding('ASCII-8BIT')))
+    end
+
+    context 'when decompressed content size exceeds the log threshold' do
+      before do
+        allow(Gitlab.config.gitlab).to receive(:log_decompressed_response_bytesize).and_return(11)
+      end
+
+      it_behaves_like 'logging behavior for decompressed content', 12.bytes
+    end
+
+    context 'when decompressed content size is below the log threshold' do
+      before do
+        allow(Gitlab.config.gitlab).to receive(:log_decompressed_response_bytesize).and_return(13)
+      end
+
+      it_behaves_like 'no logging for decompressed content'
+    end
+
+    context 'when log_decompressed_response_bytesize is set to zero' do
+      before do
+        allow(Gitlab.config.gitlab).to receive(:log_decompressed_response_bytesize).and_return(0)
+      end
+
+      it_behaves_like 'no logging for decompressed content'
+    end
+
+    context 'with chunked response' do
+      let(:io) do
+        chunked_response = ""
+        chunked_response += "HTTP/1.1 200 OK\r\n"
+        chunked_response += "Content-Encoding: gzip\r\n"
+        chunked_response += "Content-Type: application/octet-stream\r\n"
+        chunked_response += "Transfer-Encoding: chunked\r\n"
+        chunked_response += "\r\n"
+        gzipped_content = gzip_compress(body)
+        gzipped_content.each_char.each_slice(1024) do |chunk|
+          chunk_data = chunk.join
+          chunk_size_hex = format("%X", chunk_data.bytesize)
+
+          chunked_response += chunk_size_hex
+          chunked_response += "\r\n"
+          chunked_response += chunk_data
+          chunked_response += "\r\n"
+        end
+        chunked_response << "0\r\n\r\n"
+
+        Net::BufferedIO.new(StringIO.new(chunked_response.force_encoding('ASCII-8BIT')))
+      end
+
+      let(:body) { "A" * 2 * 1024 * 1024 }
+
+      context 'when decompressed content size exceeds the log threshold' do
+        before do
+          allow(Gitlab.config.gitlab).to receive(:log_decompressed_response_bytesize).and_return(1.megabyte)
+        end
+
+        it_behaves_like 'logging behavior for decompressed content', 2.megabytes
+      end
+
+      context 'when decompressed content size is below the log threshold' do
+        before do
+          allow(Gitlab.config.gitlab).to receive(:log_decompressed_response_bytesize).and_return(3.megabytes)
+        end
+
+        it_behaves_like 'no logging for decompressed content'
+      end
+    end
+  end
+end
-- 
GitLab