diff --git a/lib/gitlab/database/sos.rb b/lib/gitlab/database/sos.rb index f6e7c84548c7bdb43569c2be8cff02e4e0a40c20..f24f49d09fedc7acf4775bf0bec58d237ce87218 100644 --- a/lib/gitlab/database/sos.rb +++ b/lib/gitlab/database/sos.rb @@ -4,15 +4,35 @@ module Gitlab module Database module Sos - TASKS = [ + DURATION = 5.minutes + SAMPLING_INTERVAL = 3.seconds + + SINGLE_TASKS = [ Sos::ArSchemaDump, Sos::DbStatsActivity ].freeze + LONG_RUNNING_TASKS = [ + Sos::DbLoopStatsActivity + ].freeze + def self.run(output_file) Output.writing(output_file, mode: :directory) do |output| Gitlab::Database::EachDatabase.each_connection(include_shared: false) do |conn, name| - TASKS.each { |t| t.new(conn, name, output).run } + SINGLE_TASKS.each do |t| + t.new(conn, name, output).run + end + end + + duration = DURATION.from_now + + while duration.future? + Gitlab::Database::EachDatabase.each_connection(include_shared: false) do |conn, name| + LONG_RUNNING_TASKS.each do |t| + t.new(conn, name, output).run + end + end + sleep(SAMPLING_INTERVAL) end end end diff --git a/lib/gitlab/database/sos/base_db_stats_handler.rb b/lib/gitlab/database/sos/base_db_stats_handler.rb index 703a3747d6bd66c8139bfd90b7d88496308b0d31..93c525b5393d5f9a229465605357bcfedaffb386 100644 --- a/lib/gitlab/database/sos/base_db_stats_handler.rb +++ b/lib/gitlab/database/sos/base_db_stats_handler.rb @@ -19,17 +19,23 @@ def execute_query(query) [] end - def write_to_csv(query_name, result) - file_path = File.join(name, "#{query_name}.csv") + def write_to_csv(query_name, result, include_timestamp: false) + timestamp = Time.zone.now.strftime("%Y%m%d_%H%M%S") + + file_path = if include_timestamp + File.join(name, query_name.to_s, "#{timestamp}.csv") + else + File.join(name, "#{query_name}.csv") + end output.write_file(file_path) do |f| - CSV.open(f, 'w+') do |csv| + CSV.open(f, "w+") do |csv| csv << result.fields result.each { |row| csv << row.values } end end rescue StandardError => e - Gitlab::AppLogger.error("Error writing CSV for DB:#{name} query:#{query_name} error message:#{e.message}") + Gitlab::AppLogger.error("Error writing CSV for DB:#{name} query:#{query_name} error_message:#{e.message}") end end end diff --git a/lib/gitlab/database/sos/db_loop_stats_activity.rb b/lib/gitlab/database/sos/db_loop_stats_activity.rb new file mode 100644 index 0000000000000000000000000000000000000000..241d75bc22b558f5d303da1c418022fe81930b08 --- /dev/null +++ b/lib/gitlab/database/sos/db_loop_stats_activity.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +require 'csv' + +module Gitlab + module Database + module Sos + class DbLoopStatsActivity < BaseDbStatsHandler + QUERIES = { + pg_stat_user_tables: <<~SQL, + SELECT now() AS timestamp, * + FROM pg_stat_user_tables; + SQL + + pg_stat_user_indexes: <<~SQL, + SELECT now() AS timestamp, * + FROM pg_stat_user_indexes; + SQL + + pg_statio_user_tables: <<~SQL, + SELECT now() AS timestamp, * + FROM pg_statio_user_tables; + SQL + + pg_statio_user_indexes: <<~SQL, + SELECT now() AS timestamp, * + FROM pg_statio_user_indexes; + SQL + + table_relation_size: <<~SQL.squish, + SELECT + now() AS timestamp, + n.nspname || '.' || c.relname AS "relation", + pg_total_relation_size(c.oid) AS "total_size_bytes" + FROM + pg_class c + JOIN + pg_namespace n ON n.oid = c.relnamespace + WHERE + n.nspname NOT IN ('pg_catalog', 'information_schema', 'pg_toast') + ORDER BY + pg_total_relation_size(c.oid) DESC; + SQL + + pg_lock_stat_activity: <<~SQL.squish + SELECT + now() AS timestamp, + a.pid, + a.usename, + a.application_name, + a.client_addr, + a.backend_start, + a.query_start, + a.state, + a.wait_event_type, + a.wait_event, + a.query, + l.locktype, + l.mode, + l.granted, + l.relation::regclass AS locked_relation + FROM + pg_stat_activity a + LEFT JOIN + pg_locks l ON l.pid = a.pid + WHERE + a.state != 'idle' + ORDER BY + a.query_start DESC; + SQL + }.freeze + + def run + QUERIES.each do |query_name, query| + result = execute_query(query) + write_to_csv(query_name, result, include_timestamp: true) + end + end + end + end + end +end diff --git a/spec/lib/gitlab/database/sos/base_db_stats_handler_spec.rb b/spec/lib/gitlab/database/sos/base_db_stats_handler_spec.rb index f085e8d20d726b03930e0b0a3e68d79576f05a28..319e896a47f1f70916e45ed409d95ce3d38ccbc7 100644 --- a/spec/lib/gitlab/database/sos/base_db_stats_handler_spec.rb +++ b/spec/lib/gitlab/database/sos/base_db_stats_handler_spec.rb @@ -5,20 +5,26 @@ RSpec.describe Gitlab::Database::Sos::BaseDbStatsHandler, feature_category: :database do let(:temp_directory) { Dir.mktmpdir } let(:output_file_path) { temp_directory } - let(:expected_file_path) { File.join(output_file_path, db_name, "#{query.each_key.first}.csv") } let(:output) { Gitlab::Database::Sos::Output.new(output_file_path, mode: :directory) } let(:db_name) { 'test_db' } let(:connection) { ApplicationRecord.connection } let(:handler) { described_class.new(connection, db_name, output) } - let(:query) { { pg_show_all_settings: "SHOW ALL;" } } - let(:result) { ApplicationRecord.connection.execute(query[:pg_show_all_settings]) } + let(:queries) do + { + pg_show_all_settings: 'SHOW ALL;', + pg_statio_user_tables: 'SELECT now() AS timestamp, * FROM pg_statio_user_tables;' + } + end - before do - allow(Gitlab::Database::Sos::DbStatsActivity).to receive(:queries).and_return({ - pg_show_all_settings: 'SHOW ALL;' - }) + let(:result) { ApplicationRecord.connection.execute(queries[:pg_show_all_settings]) } + let(:result_with_timestamp) { ApplicationRecord.connection.execute(queries[:pg_statio_user_tables]) } + let(:timestamp) { Time.zone.now.strftime("%Y%m%d_%H%M%S") } + let(:file_path_with_timestamp) do + File.join(output_file_path, db_name, queries.keys.last.to_s, "#{timestamp}.csv") end + let(:file_path_without_timestamp) { File.join(output_file_path, db_name, "#{queries.each_key.first}.csv") } + after do FileUtils.remove_entry(temp_directory) end @@ -34,7 +40,7 @@ describe '#execute_query' do context "when a query is sucessfully executed" do it 'executes the query and returns the result' do - result = handler.execute_query(query[:pg_show_all_settings]) + result = handler.execute_query(queries[:pg_show_all_settings]) expect(result).to be_an(PG::Result) expect(result.ntuples).to be > 0 end @@ -55,14 +61,26 @@ end describe '#write_to_csv' do + before do + allow(Time.zone).to receive(:now).and_return(Time.zone.parse('2023-01-01 12:00:00 UTC')) + + allow(Gitlab::Database::Sos::DbStatsActivity).to receive(:queries).and_return({ + pg_show_all_settings: 'SHOW ALL;' + }) + + allow(Gitlab::Database::Sos::DbLoopStatsActivity).to receive(:queries).and_return({ + pg_statio_user_tables: 'SELECT now() AS timestamp, * FROM pg_statio_user_tables;' + }) + end + context 'when result exists' do - it 'creates a CSV file with the correct headers and data (if applicable)' do - handler.write_to_csv(query.each_key.first, result) + it 'creates a CSV file with the correct headers and data (if applicable) without timestamps' do + handler.write_to_csv(queries.each_key.first, result) output.finish - expect(File.exist?(expected_file_path)).to be true + expect(File.exist?(file_path_without_timestamp)).to be true - csv_content = CSV.read(expected_file_path) + csv_content = CSV.read(file_path_without_timestamp) expect(csv_content.first).to eq(%w[name setting description]) @@ -73,17 +91,36 @@ # it's safe to say this value will not change for us. expect(block_size_row[1]).to eq('8192') end + + it 'creates a CSV file with the correct headers and data (if applicable) with timestamps' do + handler.write_to_csv(queries.keys.last, result_with_timestamp, include_timestamp: true) + output.finish + + expect(File.exist?(file_path_with_timestamp)).to be true + + csv_content = CSV.read(file_path_with_timestamp) + + expect(csv_content.first).to include("timestamp", "relid", "schemaname") + end end context 'when result is empty' do let(:empty_result) { [] } - it 'creates an empty CSV file' do - handler.write_to_csv(query.each_key.first, empty_result) + it 'creates an empty CSV file without timestamp' do + handler.write_to_csv(queries.each_key.first, empty_result) + output.finish + + expect(File.exist?(file_path_without_timestamp)).to be true + expect(File.zero?(file_path_without_timestamp)).to be true + end + + it 'creates an empty CSV file with timestamp' do + handler.write_to_csv(queries.keys.last, empty_result, include_timestamp: true) output.finish - expect(File.exist?(expected_file_path)).to be true - expect(File.zero?(expected_file_path)).to be true + expect(File.exist?(file_path_with_timestamp)).to be true + expect(File.zero?(file_path_with_timestamp)).to be true end end @@ -94,9 +131,9 @@ it 'logs the error' do expect(Gitlab::AppLogger).to receive(:error) do |message| - expect(message).to include("Error writing CSV for DB:#{db_name} query:#{query.each_key.first} error message") + expect(message).to include("Error writing CSV for DB:#{db_name} query:#{queries.each_key.first} ") end - handler.write_to_csv(query.each_key.first, result) + handler.write_to_csv(queries.each_key.first, result) end end end diff --git a/spec/lib/gitlab/database/sos/db_loop_stats_activity_spec.rb b/spec/lib/gitlab/database/sos/db_loop_stats_activity_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..4586ef8c397649ff3f866d1a02d4f12aff2a23f6 --- /dev/null +++ b/spec/lib/gitlab/database/sos/db_loop_stats_activity_spec.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Sos::DbLoopStatsActivity, feature_category: :database do + let(:temp_directory) { Dir.mktmpdir } + let(:output_file_path) { temp_directory } + let(:output) { Gitlab::Database::Sos::Output.new(temp_directory, mode: :directory) } + let(:db_name) { 'test_db' } + let(:connection) { ApplicationRecord.connection } + let(:handler) { described_class.new(connection, db_name, output) } + let(:query) { { pg_stat_user_indexes: "SELECT * FROM pg_stat_user_indexes;" } } + let(:result) { ApplicationRecord.connection.execute(query[:pg_stat_user_indexes]) } + let(:timestamp) { Time.zone.now.strftime("%Y%m%d_%H%M%S") } + + after do + FileUtils.remove_entry(temp_directory) + end + + describe '#run' do + it 'successfully writes each query result to csv' do + expect_next_instance_of(Gitlab::Database::Sos::Output) do |instance| + expect(instance).to receive(:write_file).exactly(described_class::QUERIES.count).times + end + handler.run + end + end + + describe 'individual queries' do + described_class::QUERIES.each do |name, query| + it "successfully executes and returns results for #{name}" do + result = handler.execute_query(query) + + expect(result).to be_a(PG::Result) + expect(result.nfields).to be > 0 + + case name + when :pg_stat_user_tables + expect(result.fields).to include("timestamp", "relid", "schemaname", "relname", "seq_scan") + when :pg_stat_user_indexes + expect(result.fields).to include("timestamp", "relid", "indexrelid", "schemaname", "relname") + when :pg_statio_user_tables + expect(result.fields).to include("timestamp", "relid", "schemaname", "relname", "heap_blks_read") + when :pg_statio_user_indexes + expect(result.fields).to include("timestamp", "relid", "indexrelid", "schemaname", "relname", "idx_blks_read") + when :table_relation_size + expect(result.fields).to eq %w[timestamp relation total_size_bytes] + when :pg_lock_stat_activity + expect(result.fields).to include("timestamp", "pid", "usename", "application_name", "client_addr") + end + end + end + end +end diff --git a/spec/lib/gitlab/database/sos_spec.rb b/spec/lib/gitlab/database/sos_spec.rb index a85ebb36a31563c7509f8252a43f861e26e780b6..8248a82c05436c81ceafd138ae4b0e3e90cce38d 100644 --- a/spec/lib/gitlab/database/sos_spec.rb +++ b/spec/lib/gitlab/database/sos_spec.rb @@ -2,22 +2,26 @@ require 'spec_helper' -# WIP RSpec.describe Gitlab::Database::Sos, feature_category: :database do describe '#run' do let(:temp_directory) { Dir.mktmpdir } let(:output_file_path) { temp_directory } - let(:task) { Gitlab::Database::Sos::DbStatsActivity } + let(:connection) { ApplicationRecord.connection } + let(:db_name) { 'test_db' } + + before do + stub_const("#{described_class}::DURATION", 3.seconds) + stub_const("#{described_class}::TIME", 0) + allow(Gitlab::Database::EachDatabase).to receive(:each_connection).and_yield(connection, db_name) + end after do FileUtils.remove_entry(temp_directory) end - it "creates temp directory of pg data" do - stub_const("#{described_class}::TASKS", [task]) - result = described_class.run(output_file_path) - expect(result.size).to be >= 1 - expect(Dir.glob(File.join(temp_directory, '**', '*.csv'))).not_to be_empty + it "creates a temp directory of pg data" do + described_class.run(output_file_path) + expect(Dir.glob(File.join(output_file_path, '**', '*.csv'))).not_to be_empty end end end