From 25741094b0bfc4c1c73300c6311b0c53146a43b2 Mon Sep 17 00:00:00 2001 From: Aakriti Gupta <agupta@gitlab.com> Date: Fri, 25 Oct 2024 11:31:03 +0200 Subject: [PATCH] WIP Make database target work --- .../backup/cli/database/configuration.rb | 50 ++++++----------- .../gitlab/backup/cli/database/postgres.rb | 2 + .../lib/gitlab/backup/cli/database/wrapper.rb | 2 +- .../lib/gitlab/backup/cli/targets/database.rb | 53 ++++++------------- 4 files changed, 35 insertions(+), 72 deletions(-) diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/configuration.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/configuration.rb index 4cc853d62214e..596a353e028b7 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/configuration.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/configuration.rb @@ -7,47 +7,27 @@ module Backup module Cli module Database class Configuration - # Connection name is the key used in `config/database.yml` for multi-database connection configuration - # - # @return [String] - attr_reader :connection_name, :activerecord_variables + attr_reader :connection_name, :db_wrapper - # ActiveRecord base model that is configured to connect to the database identified by connection_name key - # - # @return [ActiveRecord::Base] - attr_reader :source_model, :db_wrapper - - # Initializes configuration - # - # @param [String] connection_name the key from `database.yml` for multi-database connection configuration def initialize(connection_name, db_wrapper) @connection_name = connection_name @db_wrapper = db_wrapper - @source_model = db_wrapper.database_base_models_with_gitlab_shared[connection_name] || - db_wrapper.database_base_models_with_gitlab_shared['main'] - @activerecord_database_config = ActiveRecord::Base.configurations.find_db_config(connection_name) || - ActiveRecord::Base.configurations.find_db_config('main') - end - - # # Return the HashConfig for the database - # # - # # @return [ActiveRecord::DatabaseConfigurations::HashConfig] - def activerecord_configuration - ActiveRecord::DatabaseConfigurations::HashConfig.new( - @activerecord_database_config&.env_name || db_wrapper.context.env, - connection_name, - activerecord_variables - ) end - private - - # Return the database configuration from rails config/database.yml file - # in the format expected by ActiveRecord::DatabaseConfigurations::HashConfig - # - # @return [Hash] configuration hash - def original_activerecord_config - @activerecord_database_config.configuration_hash.dup + # TODO read config from yaml/context based on connection_name + def activerecord_database_config + @activerecord_database_config ||= activerecord_database_config + { + :adapter=>"postgresql", + :encoding=>"unicode", + :database=>"gitlabhq_development", + :host=>"/Users/aakritigupta/Development/gdk/postgresql", + :port=>5432, + :pool=>10, + :gssencmode=>"disable", + :prepared_statements=>false, + :variables=>{"statement_timeout"=>"120s"} + } end end end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/postgres.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/postgres.rb index 83d3002a1810e..88b3130fb1dcd 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/postgres.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/postgres.rb @@ -16,6 +16,8 @@ class Postgres def dump(output_file_path, pg_dump) compress_rd, compress_wr = IO.pipe + # TODO use Shell::Pipeline here + # TODO backport to legacy tool as well compress_pid = spawn(compress_cmd, in: compress_rd, out: [output_file_path, 'w', FILE_PERMISSION]) compress_rd.close diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/wrapper.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/wrapper.rb index 56d8f03ae15cf..73cd7a024519f 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/wrapper.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/wrapper.rb @@ -49,7 +49,7 @@ def initialize(context) # This is an extensive list of postgres schemas owned by GitLab # It does not include the default public schema - # EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA, STATIC_PARTITIONS_SCHEMA].freeze + EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA, STATIC_PARTITIONS_SCHEMA].freeze # PRIMARY_DATABASE_NAME = ActiveRecord::Base.connection_db_config.name.to_sym # rubocop:disable Database/MultipleDatabases diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb index a658ea173b463..4ef97e8fe9583 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb @@ -30,7 +30,7 @@ def dump(destination_dir) each_database(destination_dir) do |backup_connection| pg_env = backup_connection.database_configuration.pg_env_variables - active_record_config = backup_connection.database_configuration.activerecord_variables + active_record_config = backup_connection.database_configuration.active_record_config pg_database_name = active_record_config[:database] dump_file_name = file_name(destination_dir, backup_connection.connection_name) @@ -38,20 +38,12 @@ def dump(destination_dir) Gitlab::Backup::Cli::Output.print_info("Dumping PostgreSQL database #{pg_database_name} ... ") - schemas = [] - - if context.backup_pg_schema - schemas << context.backup_pg_schema - schemas.push(*databse::EXTRA_SCHEMAS.map(&:to_s)) - end - pg_dump = ::Gitlab::Backup::Cli::Utils::PgDump.new( database_name: pg_database_name, snapshot_id: backup_connection.snapshot_id, schemas: schemas, env: pg_env) - # TODO success = ::Gitlab::Backup::Cli::Database::Postgres.new.dump(dump_file_name, pg_dump) backup_connection.release_snapshot! if backup_connection.snapshot_id @@ -60,30 +52,16 @@ def dump(destination_dir) report_finish_status(success) end - ensure - if database_wrapper.multiple_databases? - ::Gitlab::Backup::Cli::Database::EachDatabase.each_connection( - only: base_models_for_backup.keys, include_shared: false - ) do |_, database_connection_name| - backup_connection = ::Gitlab::Backup::Cli::Database::Connection.new(database_connection_name, database_wrapper) - backup_connection.restore_timeouts! - rescue ActiveRecord::ConnectionNotEstablished - raise DatabaseBackupError.new( - backup_connection.database_configuration.activerecord_variables, - file_name(destination_dir, database_connection_name) - ) - end - end + # TODO Handle db connection timeouts end def restore(destination_dir) base_models_for_backup.each do |database_name, _| backup_connection = ::Gitlab::Backup::Cli::Database::Connection.new(database_name, database_wrapper) - config = backup_connection.database_configuration.activerecord_variables + database = backup_connection.database_configuration.active_record_config[:database] - db_file_name = file_name(destination_dir, database_name) - database = config[:database] + db_file_name = file_name(source, database_name) unless File.exist?(db_file_name) if main_database?(database_name) @@ -129,6 +107,17 @@ def restore(destination_dir) protected + def schemas + schemas = [] + + if context.backup_pg_schema + schemas << context.backup_pg_schema + schemas.push(*database_wrapper::EXTRA_SCHEMAS.map(&:to_s)) + end + + schemas + end + def base_models_for_backup @base_models_for_backup ||= database_wrapper.database_base_models_with_gitlab_shared end @@ -156,7 +145,6 @@ def report_finish_status(status) def drop_tables(database_name) Gitlab::Backup::Cli::Output.info 'Cleaning the database ... ' - # TODO if Rake::Task.task_defined? "gitlab:db:drop_tables:#{database_name}" Rake::Task["gitlab:db:drop_tables:#{database_name}"].invoke else @@ -167,21 +155,14 @@ def drop_tables(database_name) Gitlab::Backup::Cli::Output.print_tag(:success) end - # @deprecated This will be removed when restore operation is refactored to use extended_env directly - def with_transient_pg_env(extended_env) - ENV.merge!(extended_env) - result = yield - ENV.reject! { |k, _| extended_env.key?(k) } - - result - end - def pg_restore_cmd(database, pg_env) Shell::Command.new('psql', database, env: pg_env) end def each_database(destination_dir, &block) databases = [] + # TODO rewrite to read config and pick only dbs that don't have database_tasks: false + # each connection will loop through all database connections defined in `database.yml` # and reject the ones that are shared, so we don't get duplicates -- GitLab