diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 4f8fbd0c07e227cc9936b8657a8cb4677317f0c2..b19b342c665b4436e435b813dae5b98fc969ded5 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -347,6 +347,7 @@ Some basic Ruby runtime metrics are available: |:---------------------------------------- |:--------- |:----- |:----------- | | `ruby_gc_duration_seconds` | Counter | 11.1 | Time spent by Ruby in GC | | `ruby_gc_stat_...` | Gauge | 11.1 | Various metrics from [GC.stat](https://ruby-doc.org/core-2.6.5/GC.html#method-c-stat) | +| `ruby_gc_stat_ext_heap_fragmentation` | Gauge | 15.2 | Degree of Ruby heap fragmentation as live objects versus eden slots (range 0 to 1) | | `ruby_file_descriptors` | Gauge | 11.1 | File descriptors per process | | `ruby_sampler_duration_seconds` | Counter | 11.1 | Time spent collecting stats | | `ruby_process_cpu_seconds_total` | Gauge | 12.0 | Total amount of CPU time per process | diff --git a/lib/gitlab/metrics/memory.rb b/lib/gitlab/metrics/memory.rb new file mode 100644 index 0000000000000000000000000000000000000000..c165cdec7a3ddb9b17bce0e6f7a35f1889d2d6eb --- /dev/null +++ b/lib/gitlab/metrics/memory.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + module Memory + extend self + + HEAP_SLOTS_PER_PAGE = GC::INTERNAL_CONSTANTS[:HEAP_PAGE_OBJ_LIMIT] + + def gc_heap_fragmentation(gc_stat = GC.stat) + 1 - (gc_stat[:heap_live_slots] / (HEAP_SLOTS_PER_PAGE * gc_stat[:heap_eden_pages].to_f)) + end + end + end +end diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index 4a3ef3711a508e0f59db7449a6555b6f1e513cb3..8e0022933471c2996197402869b6c4e9226c017d 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -39,7 +39,8 @@ def init_metrics process_proportional_memory_bytes: ::Gitlab::Metrics.gauge(metric_name(:process, :proportional_memory_bytes), 'Memory used (PSS)', labels), process_start_time_seconds: ::Gitlab::Metrics.gauge(metric_name(:process, :start_time_seconds), 'Process start time seconds'), sampler_duration: ::Gitlab::Metrics.counter(metric_name(:sampler, :duration_seconds_total), 'Sampler time', labels), - gc_duration_seconds: ::Gitlab::Metrics.histogram(metric_name(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS) + gc_duration_seconds: ::Gitlab::Metrics.histogram(metric_name(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS), + heap_fragmentation: ::Gitlab::Metrics.gauge(metric_name(:gc_stat_ext, :heap_fragmentation), 'Ruby heap fragmentation', labels) } GC.stat.keys.each do |key| @@ -76,8 +77,13 @@ def sample_gc end # Collect generic GC stats - GC.stat.each do |key, value| - metrics[key].set(labels, value) + GC.stat.then do |gc_stat| + gc_stat.each do |key, value| + metrics[key].set(labels, value) + end + + # Collect custom GC stats + metrics[:heap_fragmentation].set(labels, Memory.gc_heap_fragmentation(gc_stat)) end end diff --git a/metrics_server/dependencies.rb b/metrics_server/dependencies.rb index 3f188658ba298b58fc694019d6579eb7d5e9bb22..233511eb505e4f11b3267ddcd67db166bf18e218 100644 --- a/metrics_server/dependencies.rb +++ b/metrics_server/dependencies.rb @@ -20,6 +20,7 @@ require_relative '../lib/gitlab/metrics/prometheus' require_relative '../lib/gitlab/metrics' require_relative '../lib/gitlab/metrics/system' +require_relative '../lib/gitlab/metrics/memory' require_relative '../lib/gitlab/metrics/samplers/base_sampler' require_relative '../lib/gitlab/metrics/samplers/ruby_sampler' require_relative '../lib/gitlab/metrics/exporter/base_exporter' diff --git a/spec/lib/gitlab/metrics/memory_spec.rb b/spec/lib/gitlab/metrics/memory_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..fd8ca3b37c634eb56047f6ad9160b18ca90bf217 --- /dev/null +++ b/spec/lib/gitlab/metrics/memory_spec.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Metrics::Memory do + describe '.gc_heap_fragmentation' do + subject(:call) do + described_class.gc_heap_fragmentation( + heap_live_slots: gc_stat_heap_live_slots, + heap_eden_pages: gc_stat_heap_eden_pages + ) + end + + context 'when the Ruby heap is perfectly utilized' do + # All objects are located in a single heap page. + let(:gc_stat_heap_live_slots) { described_class::HEAP_SLOTS_PER_PAGE } + let(:gc_stat_heap_eden_pages) { 1 } + + it { is_expected.to eq(0) } + end + + context 'when the Ruby heap is greatly fragmented' do + # There is one object per heap page. + let(:gc_stat_heap_live_slots) { described_class::HEAP_SLOTS_PER_PAGE } + let(:gc_stat_heap_eden_pages) { described_class::HEAP_SLOTS_PER_PAGE } + + # The heap can never be "perfectly fragmented" because that would require + # zero objects per page. + it { is_expected.to be > 0.99 } + end + + context 'when the Ruby heap is semi-fragmented' do + # All objects are spread over two pages i.e. each page is 50% utilized. + let(:gc_stat_heap_live_slots) { described_class::HEAP_SLOTS_PER_PAGE } + let(:gc_stat_heap_eden_pages) { 2 } + + it { is_expected.to eq(0.5) } + end + end +end diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb index dfae5aa678450ace8f64d3a459a1a543389ab7f9..b1566ffa7b4603618c046be1f31b78322f1e232e 100644 --- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb @@ -125,5 +125,11 @@ sampler.sample end + + it 'adds a heap fragmentation metric' do + expect(sampler.metrics[:heap_fragmentation]).to receive(:set).with({}, anything) + + sampler.sample + end end end