From aeafec338a65f4ca61bf39f0f2c9cf568161ed6b Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Wed, 31 Jan 2024 14:22:09 -0800 Subject: [PATCH] Reduce memory allocations when iterating over an Enumerator https://gitlab.com/gitlab-org/gitlab/-/merge_requests/88882 introduced a monkey patch for `Enumeration#next` to improve stack traces for Gitaly calls due to https://bugs.ruby-lang.org/issues/16829. This patch relies on checking that `gitlab_patch_backtrace_marker` is present in the stack trace. However, for a Rust extension like `prometheus-client-mmap`, the Ruby interpreter omits the details of the calls made inside the extension. For example, in https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/8373#note_1748497473, we see `StopIteration` is raised when the extension iterates through all strings in its `WeakMap`, but the backtrace does not contain the expected signature since `Enumerator#next` is called from the Rust code. As a result, the backtrace would always be unnecessarily appended to the `StopIteration` exception, resulting in millions of wasted memory allocations. This commit reduces memory allocations in two ways: 1. Use a constant for the regular expression. This eliminates the need to call `parse_regexp` in the Ruby interpreter repeatedly each time `Enumerator#next` is called. 2. Don't append stack traces for `StopIteration`. A Enumerator raises `StopIteration` when the end is reached, and `Kernel#loop` rescues this exception. Previously a lot of care went into ensuring that nested enums preserved the full backtrace when `StopIteration` occurred, but this generally isn't needed for debugging. If a C or Rust extension iterates through an array repeatedly, we saw millions of temporary objects being created to build the complete stack trace for no real use. Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/439920 Changelog: performance --- config/initializers/enumerator_next_patch.rb | 9 ++++- .../enumerator_next_patch_spec.rb | 35 ++++++------------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/config/initializers/enumerator_next_patch.rb b/config/initializers/enumerator_next_patch.rb index 3771864fb918..bb6f626080e1 100644 --- a/config/initializers/enumerator_next_patch.rb +++ b/config/initializers/enumerator_next_patch.rb @@ -4,9 +4,16 @@ # when an error is raised from within a Fiber. # https://bugs.ruby-lang.org/issues/16829 module EnumeratorNextPatch + GITLAB_BACKTRACE_REGEX = %r{^(.*):[0-9]+:in `gitlab_patch_backtrace_marker'$} + %w[next next_values peek peek_values].each do |name| define_method(name) do |*args| gitlab_patch_backtrace_marker { super(*args) } + # A Enumerator raises StopIteration when the end is reached and + # Kernel#loop rescues this. Ignore these exceptions since they are + # normal, and we want to avoid memory allocations when they occur. + rescue StopIteration + raise rescue Exception => err # rubocop: disable Lint/RescueException err.set_backtrace(err.backtrace + caller) unless has_gitlab_patch_backtrace_marker?(err.backtrace) && backtrace_matches_caller?(err.backtrace) @@ -27,7 +34,7 @@ def gitlab_patch_backtrace_marker # #gitlab_patch_backtrace_marker calls a block, which in turn calls #next.) If it's generated # by the Fiber that #next invokes, then it won't contain this marker. def has_gitlab_patch_backtrace_marker?(backtrace) - match = %r{^(.*):[0-9]+:in `gitlab_patch_backtrace_marker'$}.match(backtrace[2]) + match = GITLAB_BACKTRACE_REGEX.match(backtrace[2]) !!match && match[1] == __FILE__ end diff --git a/spec/initializers/enumerator_next_patch_spec.rb b/spec/initializers/enumerator_next_patch_spec.rb index bf8ab823e53a..b1d4b69cea10 100644 --- a/spec/initializers/enumerator_next_patch_spec.rb +++ b/spec/initializers/enumerator_next_patch_spec.rb @@ -2,18 +2,17 @@ require 'spec_helper' -RSpec.describe 'Enumerator#next patch fix' do +RSpec.describe 'Enumerator#next patch fix', feature_category: :shared do describe 'Enumerator' do RSpec::Matchers.define :contain_unique_method_calls_in_order do |expected| attr_reader :actual match do |actual| @actual_err = actual - regexps = expected.map { |method_name| { name: method_name, regexp: make_regexp(method_name) } } @actual = actual.backtrace.filter_map do |line| - regexp = regexps.find { |r| r[:regexp].match? line } + match = source_regexp.match(line) - regexp[:name] if regexp + match[1] if match end expected == @actual @@ -27,8 +26,8 @@ private - def make_regexp(method_name) - Regexp.new("/spec/initializers/enumerator_next_patch_spec\\.rb:[0-9]+:in `#{method_name}'$") + def source_regexp + @source_regexp ||= Regexp.new("/spec/initializers/enumerator_next_patch_spec\\.rb:[0-9]+:in `([a-zA-Z_]*)'$") end end @@ -40,6 +39,10 @@ def have_been_raised_by_enum_object_and_fixed_up contain_unique_method_calls_in_order %w[make_error call_enum_method] end + def have_been_raised_by_enum_object_and_not_fixed_up + contain_unique_method_calls_in_order %w[make_error] + end + def have_been_raised_by_nested_next_and_fixed_up contain_unique_method_calls_in_order %w[call_nested_next call_enum_method] end @@ -90,22 +93,6 @@ def call_enum_method expect(err).to have_been_raised_by_next_and_not_fixed_up end end - - context 'nested enum object' do - def call_nested_next - nested_enumerator.next - end - - let(:nested_enumerator) { Enumerator.new { |_| } } - let(:enumerator) { Enumerator.new { |yielder| yielder << call_nested_next } } - - it 'fixes up StopIteration thrown by another instance of #next' do - expect { subject }.to raise_error do |err| - expect(err).to be_a(StopIteration) - expect(err).to have_been_raised_by_nested_next_and_fixed_up - end - end - end end describe 'arguments error' do @@ -127,14 +114,14 @@ def call_enum_method let(:enumerator) { Enumerator.new { |_| raise error } } let(:error) { make_error } - it 'fixes up StopIteration' do + it 'does not fix up StopIteration' do def make_error StopIteration.new.tap { |err| err.set_backtrace(caller) } end expect { subject }.to raise_error do |err| expect(err).to be(error) - expect(err).to have_been_raised_by_enum_object_and_fixed_up + expect(err).to have_been_raised_by_enum_object_and_not_fixed_up end end -- GitLab