From aeafec338a65f4ca61bf39f0f2c9cf568161ed6b Mon Sep 17 00:00:00 2001
From: Stan Hu <stanhu@gmail.com>
Date: Wed, 31 Jan 2024 14:22:09 -0800
Subject: [PATCH] Reduce memory allocations when iterating over an Enumerator

https://gitlab.com/gitlab-org/gitlab/-/merge_requests/88882 introduced
a monkey patch for `Enumeration#next` to improve stack traces for
Gitaly calls due to https://bugs.ruby-lang.org/issues/16829.

This patch relies on checking that `gitlab_patch_backtrace_marker` is
present in the stack trace. However, for a Rust extension like
`prometheus-client-mmap`, the Ruby interpreter omits the details of
the calls made inside the extension. For example, in
https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/8373#note_1748497473,
we see `StopIteration` is raised when the extension iterates through
all strings in its `WeakMap`, but the backtrace does not contain the
expected signature since `Enumerator#next` is called from the Rust
code. As a result, the backtrace would always be unnecessarily
appended to the `StopIteration` exception, resulting in millions of
wasted memory allocations.

This commit reduces memory allocations in two ways:

1. Use a constant for the regular expression. This eliminates the need
to call `parse_regexp` in the Ruby interpreter repeatedly each time
`Enumerator#next` is called.

2. Don't append stack traces for `StopIteration`. A Enumerator raises
`StopIteration` when the end is reached, and `Kernel#loop` rescues this
exception. Previously a lot of care went into ensuring that nested
enums preserved the full backtrace when `StopIteration` occurred, but
this generally isn't needed for debugging. If a C or Rust extension
iterates through an array repeatedly, we saw millions of temporary
objects being created to build the complete stack trace for no real
use.

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/439920

Changelog: performance
---
 config/initializers/enumerator_next_patch.rb  |  9 ++++-
 .../enumerator_next_patch_spec.rb             | 35 ++++++-------------
 2 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/config/initializers/enumerator_next_patch.rb b/config/initializers/enumerator_next_patch.rb
index 3771864fb918..bb6f626080e1 100644
--- a/config/initializers/enumerator_next_patch.rb
+++ b/config/initializers/enumerator_next_patch.rb
@@ -4,9 +4,16 @@
 # when an error is raised from within a Fiber.
 # https://bugs.ruby-lang.org/issues/16829
 module EnumeratorNextPatch
+  GITLAB_BACKTRACE_REGEX = %r{^(.*):[0-9]+:in `gitlab_patch_backtrace_marker'$}
+
   %w[next next_values peek peek_values].each do |name|
     define_method(name) do |*args|
       gitlab_patch_backtrace_marker { super(*args) }
+    # A Enumerator raises StopIteration when the end is reached and
+    # Kernel#loop rescues this. Ignore these exceptions since they are
+    # normal, and we want to avoid memory allocations when they occur.
+    rescue StopIteration
+      raise
     rescue Exception => err # rubocop: disable Lint/RescueException
       err.set_backtrace(err.backtrace + caller) unless
         has_gitlab_patch_backtrace_marker?(err.backtrace) && backtrace_matches_caller?(err.backtrace)
@@ -27,7 +34,7 @@ def gitlab_patch_backtrace_marker
   # #gitlab_patch_backtrace_marker calls a block, which in turn calls #next.) If it's generated
   # by the Fiber that #next invokes, then it won't contain this marker.
   def has_gitlab_patch_backtrace_marker?(backtrace)
-    match = %r{^(.*):[0-9]+:in `gitlab_patch_backtrace_marker'$}.match(backtrace[2])
+    match = GITLAB_BACKTRACE_REGEX.match(backtrace[2])
 
     !!match && match[1] == __FILE__
   end
diff --git a/spec/initializers/enumerator_next_patch_spec.rb b/spec/initializers/enumerator_next_patch_spec.rb
index bf8ab823e53a..b1d4b69cea10 100644
--- a/spec/initializers/enumerator_next_patch_spec.rb
+++ b/spec/initializers/enumerator_next_patch_spec.rb
@@ -2,18 +2,17 @@
 
 require 'spec_helper'
 
-RSpec.describe 'Enumerator#next patch fix' do
+RSpec.describe 'Enumerator#next patch fix', feature_category: :shared do
   describe 'Enumerator' do
     RSpec::Matchers.define :contain_unique_method_calls_in_order do |expected|
       attr_reader :actual
 
       match do |actual|
         @actual_err = actual
-        regexps = expected.map { |method_name| { name: method_name, regexp: make_regexp(method_name) } }
         @actual = actual.backtrace.filter_map do |line|
-          regexp = regexps.find { |r| r[:regexp].match? line }
+          match = source_regexp.match(line)
 
-          regexp[:name] if regexp
+          match[1] if match
         end
 
         expected == @actual
@@ -27,8 +26,8 @@
 
       private
 
-      def make_regexp(method_name)
-        Regexp.new("/spec/initializers/enumerator_next_patch_spec\\.rb:[0-9]+:in `#{method_name}'$")
+      def source_regexp
+        @source_regexp ||= Regexp.new("/spec/initializers/enumerator_next_patch_spec\\.rb:[0-9]+:in `([a-zA-Z_]*)'$")
       end
     end
 
@@ -40,6 +39,10 @@ def have_been_raised_by_enum_object_and_fixed_up
       contain_unique_method_calls_in_order %w[make_error call_enum_method]
     end
 
+    def have_been_raised_by_enum_object_and_not_fixed_up
+      contain_unique_method_calls_in_order %w[make_error]
+    end
+
     def have_been_raised_by_nested_next_and_fixed_up
       contain_unique_method_calls_in_order %w[call_nested_next call_enum_method]
     end
@@ -90,22 +93,6 @@ def call_enum_method
               expect(err).to have_been_raised_by_next_and_not_fixed_up
             end
           end
-
-          context 'nested enum object' do
-            def call_nested_next
-              nested_enumerator.next
-            end
-
-            let(:nested_enumerator) { Enumerator.new { |_| } }
-            let(:enumerator) { Enumerator.new { |yielder| yielder << call_nested_next } }
-
-            it 'fixes up StopIteration thrown by another instance of #next' do
-              expect { subject }.to raise_error do |err|
-                expect(err).to be_a(StopIteration)
-                expect(err).to have_been_raised_by_nested_next_and_fixed_up
-              end
-            end
-          end
         end
 
         describe 'arguments error' do
@@ -127,14 +114,14 @@ def call_enum_method
           let(:enumerator) { Enumerator.new { |_| raise error } }
           let(:error) { make_error }
 
-          it 'fixes up StopIteration' do
+          it 'does not fix up StopIteration' do
             def make_error
               StopIteration.new.tap { |err| err.set_backtrace(caller) }
             end
 
             expect { subject }.to raise_error do |err|
               expect(err).to be(error)
-              expect(err).to have_been_raised_by_enum_object_and_fixed_up
+              expect(err).to have_been_raised_by_enum_object_and_not_fixed_up
             end
           end
 
-- 
GitLab