From 20bff3bca6d08e219c5b57f819923f0608cdb9e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20K=C3=A4ppler?= <mkaeppler@gitlab.com> Date: Tue, 5 May 2020 14:33:56 +0000 Subject: [PATCH] Report USS+PSS from ruby_sampler Reports the unique & proportional set size into prometheus. Change is behind a feature toggle. --- lib/gitlab/metrics/samplers/ruby_sampler.rb | 31 +++-- lib/gitlab/metrics/system.rb | 81 +++++++------ .../metrics/samplers/ruby_sampler_spec.rb | 17 ++- spec/lib/gitlab/metrics/system_spec.rb | 113 ++++++++++++++++-- 4 files changed, 178 insertions(+), 64 deletions(-) diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index c38769f39a933..5cd2a86a10663 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -34,14 +34,16 @@ def labels def init_metrics metrics = { - file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels), - memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels), - process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'), - process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'), - process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels), - process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'), - sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), - gc_duration_seconds: ::Gitlab::Metrics.histogram(with_prefix(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS) + file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels), + memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used (RSS)', labels), + process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'), + process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'), + process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used (RSS)', labels), + process_unique_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :unique_memory_bytes), 'Memory used (USS)', labels), + process_proportional_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :proportional_memory_bytes), 'Memory used (PSS)', labels), + process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'), + sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), + gc_duration_seconds: ::Gitlab::Metrics.histogram(with_prefix(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS) } GC.stat.keys.each do |key| @@ -85,10 +87,15 @@ def sample_gc_reports end def set_memory_usage_metrics - memory_usage = System.memory_usage - - metrics[:memory_bytes].set(labels, memory_usage) - metrics[:process_resident_memory_bytes].set(labels, memory_usage) + memory_rss = System.memory_usage + metrics[:memory_bytes].set(labels, memory_rss) + metrics[:process_resident_memory_bytes].set(labels, memory_rss) + + if Feature.enabled?(:collect_memory_uss_pss) + memory_uss_pss = System.memory_usage_uss_pss + metrics[:process_unique_memory_bytes].set(labels, memory_uss_pss[:uss]) + metrics[:process_proportional_memory_bytes].set(labels, memory_uss_pss[:pss]) + end end end end diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index 2a61b3de405d5..d01b6bc5b5012 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -7,47 +7,37 @@ module Metrics # This module relies on the /proc filesystem being available. If /proc is # not available the methods of this module will be stubbed. module System - if File.exist?('/proc') - # Returns the current process' memory usage in bytes. - def self.memory_usage - mem = 0 - match = File.read('/proc/self/status').match(/VmRSS:\s+(\d+)/) - - if match && match[1] - mem = match[1].to_f * 1024 - end - - mem - end - - def self.file_descriptor_count - Dir.glob('/proc/self/fd/*').length - end - - def self.max_open_file_descriptors - match = File.read('/proc/self/limits').match(/Max open files\s*(\d+)/) - - return unless match && match[1] + PROC_STATUS_PATH = '/proc/self/status' + PROC_SMAPS_ROLLUP_PATH = '/proc/self/smaps_rollup' + PROC_LIMITS_PATH = '/proc/self/limits' + PROC_FD_GLOB = '/proc/self/fd/*' + + PRIVATE_PAGES_PATTERN = /^(Private_Clean|Private_Dirty|Private_Hugetlb):\s+(?<value>\d+)/.freeze + PSS_PATTERN = /^Pss:\s+(?<value>\d+)/.freeze + RSS_PATTERN = /VmRSS:\s+(?<value>\d+)/.freeze + MAX_OPEN_FILES_PATTERN = /Max open files\s*(?<value>\d+)/.freeze + + # Returns the current process' RSS (resident set size) in bytes. + def self.memory_usage + sum_matches(PROC_STATUS_PATH, rss: RSS_PATTERN)[:rss].kilobytes + end - match[1].to_i - end - else - def self.memory_usage - 0.0 - end + # Returns the current process' USS/PSS (unique/proportional set size) in bytes. + def self.memory_usage_uss_pss + sum_matches(PROC_SMAPS_ROLLUP_PATH, uss: PRIVATE_PAGES_PATTERN, pss: PSS_PATTERN) + .transform_values(&:kilobytes) + end - def self.file_descriptor_count - 0 - end + def self.file_descriptor_count + Dir.glob(PROC_FD_GLOB).length + end - def self.max_open_file_descriptors - 0 - end + def self.max_open_file_descriptors + sum_matches(PROC_LIMITS_PATH, max_fds: MAX_OPEN_FILES_PATTERN)[:max_fds] end def self.cpu_time - Process - .clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second) + Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second) end # Returns the current real time in a given precision. @@ -78,6 +68,27 @@ def self.thread_cpu_duration(start_time) end_time - start_time end + + # Given a path to a file in /proc and a hash of (metric, pattern) pairs, + # sums up all values found for those patterns under the respective metric. + def self.sum_matches(proc_file, **patterns) + results = patterns.transform_values { 0 } + + begin + File.foreach(proc_file) do |line| + patterns.each do |metric, pattern| + match = line.match(pattern) + value = match&.named_captures&.fetch('value', 0) + results[metric] += value.to_i + end + end + rescue Errno::ENOENT + # This means the procfile we're reading from did not exist; + # this is safe to ignore, since we initialize each metric to 0 + end + + results + end end end end diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb index 8c4071a7ed169..9d8ec2d9b2153 100644 --- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb @@ -19,20 +19,19 @@ end describe '#sample' do - it 'samples various statistics' do - expect(Gitlab::Metrics::System).to receive(:cpu_time) - expect(Gitlab::Metrics::System).to receive(:file_descriptor_count) - expect(Gitlab::Metrics::System).to receive(:memory_usage) - expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors) - expect(sampler).to receive(:sample_gc) + it 'adds a metric containing the process resident memory bytes' do + expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000) + + expect(sampler.metrics[:process_resident_memory_bytes]).to receive(:set).with({}, 9000) sampler.sample end - it 'adds a metric containing the process resident memory bytes' do - expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000) + it 'adds a metric containing the process unique and proportional memory bytes' do + expect(Gitlab::Metrics::System).to receive(:memory_usage_uss_pss).and_return(uss: 9000, pss: 10_000) - expect(sampler.metrics[:process_resident_memory_bytes]).to receive(:set).with({}, 9000) + expect(sampler.metrics[:process_unique_memory_bytes]).to receive(:set).with({}, 9000) + expect(sampler.metrics[:process_proportional_memory_bytes]).to receive(:set).with({}, 10_000) sampler.sample end diff --git a/spec/lib/gitlab/metrics/system_spec.rb b/spec/lib/gitlab/metrics/system_spec.rb index a5aa80686fdf9..37d26bd9d63e2 100644 --- a/spec/lib/gitlab/metrics/system_spec.rb +++ b/spec/lib/gitlab/metrics/system_spec.rb @@ -3,33 +3,122 @@ require 'spec_helper' describe Gitlab::Metrics::System do - if File.exist?('/proc') + context 'when /proc files exist' do + # Fixtures pulled from: + # Linux carbon 5.3.0-7648-generic #41~1586789791~19.10~9593806-Ubuntu SMP Mon Apr 13 17:50:40 UTC x86_64 x86_64 x86_64 GNU/Linux + let(:proc_status) do + # most rows omitted for brevity + <<~SNIP + Name: less + VmHWM: 2468 kB + VmRSS: 2468 kB + RssAnon: 260 kB + SNIP + end + + let(:proc_smaps_rollup) do + # full snapshot + <<~SNIP + Rss: 2564 kB + Pss: 503 kB + Pss_Anon: 312 kB + Pss_File: 191 kB + Pss_Shmem: 0 kB + Shared_Clean: 2100 kB + Shared_Dirty: 0 kB + Private_Clean: 152 kB + Private_Dirty: 312 kB + Referenced: 2564 kB + Anonymous: 312 kB + LazyFree: 0 kB + AnonHugePages: 0 kB + ShmemPmdMapped: 0 kB + Shared_Hugetlb: 0 kB + Private_Hugetlb: 0 kB + Swap: 0 kB + SwapPss: 0 kB + Locked: 0 kB + SNIP + end + + let(:proc_limits) do + # full snapshot + <<~SNIP + Limit Soft Limit Hard Limit Units + Max cpu time unlimited unlimited seconds + Max file size unlimited unlimited bytes + Max data size unlimited unlimited bytes + Max stack size 8388608 unlimited bytes + Max core file size 0 unlimited bytes + Max resident set unlimited unlimited bytes + Max processes 126519 126519 processes + Max open files 1024 1048576 files + Max locked memory 67108864 67108864 bytes + Max address space unlimited unlimited bytes + Max file locks unlimited unlimited locks + Max pending signals 126519 126519 signals + Max msgqueue size 819200 819200 bytes + Max nice priority 0 0 + Max realtime priority 0 0 + Max realtime timeout unlimited unlimited us + SNIP + end + describe '.memory_usage' do - it "returns the process' memory usage in bytes" do - expect(described_class.memory_usage).to be > 0 + it "returns the process' resident set size (RSS) in bytes" do + mock_existing_proc_file('/proc/self/status', proc_status) + + expect(described_class.memory_usage).to eq(2527232) end end describe '.file_descriptor_count' do it 'returns the amount of open file descriptors' do - expect(described_class.file_descriptor_count).to be > 0 + expect(Dir).to receive(:glob).and_return(['/some/path', '/some/other/path']) + + expect(described_class.file_descriptor_count).to eq(2) end end describe '.max_open_file_descriptors' do it 'returns the max allowed open file descriptors' do - expect(described_class.max_open_file_descriptors).to be > 0 + mock_existing_proc_file('/proc/self/limits', proc_limits) + + expect(described_class.max_open_file_descriptors).to eq(1024) + end + end + + describe '.memory_usage_uss_pss' do + it "returns the process' unique and porportional set size (USS/PSS) in bytes" do + mock_existing_proc_file('/proc/self/smaps_rollup', proc_smaps_rollup) + + # (Private_Clean (152 kB) + Private_Dirty (312 kB) + Private_Hugetlb (0 kB)) * 1024 + expect(described_class.memory_usage_uss_pss).to eq(uss: 475136, pss: 515072) end end - else + end + + context 'when /proc files do not exist' do + before do + mock_missing_proc_file + end + describe '.memory_usage' do - it 'returns 0.0' do - expect(described_class.memory_usage).to eq(0.0) + it 'returns 0' do + expect(described_class.memory_usage).to eq(0) + end + end + + describe '.memory_usage_uss_pss' do + it "returns 0 for all components" do + expect(described_class.memory_usage_uss_pss).to eq(uss: 0, pss: 0) end end describe '.file_descriptor_count' do it 'returns 0' do + expect(Dir).to receive(:glob).and_return([]) + expect(described_class.file_descriptor_count).to eq(0) end end @@ -98,4 +187,12 @@ expect(described_class.thread_cpu_duration(start_time)).to be_nil end end + + def mock_existing_proc_file(path, content) + allow(File).to receive(:foreach).with(path) { |_path, &block| content.each_line(&block) } + end + + def mock_missing_proc_file + allow(File).to receive(:foreach).and_raise(Errno::ENOENT) + end end -- GitLab