diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli.rb index 4b60ca1e7a97835348f161e5345211b505c7f4d6..c778f1aa99ed67ad9a2e75903128084485a3ec46 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli.rb @@ -16,6 +16,7 @@ module Cli autoload :BaseExecutor, 'gitlab/backup/cli/base_executor' autoload :Commands, 'gitlab/backup/cli/commands' autoload :Context, 'gitlab/backup/cli/context' + autoload :Database, 'gitlab/backup/cli/database' autoload :Dependencies, 'gitlab/backup/cli/dependencies' autoload :Errors, 'gitlab/backup/cli/errors' autoload :GitlabConfig, 'gitlab/backup/cli/gitlab_config' diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/context/source_context.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/context/source_context.rb index ba3a2caa285cc4a35ffcd451d102a2eaf952bdef..9c2a9a9d5c3a993ac5c6570bb19659bf1dfe2c74 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/context/source_context.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/context/source_context.rb @@ -124,6 +124,14 @@ def gitaly_token gitlab_config.dig(env, 'gitaly', 'token') end + # Return the GitLab base directory + # @return [Pathname] + def gitlab_basepath + return Pathname.new(GITLAB_PATH) if GITLAB_PATH + + raise ::Gitlab::Backup::Cli::Error, 'GITLAB_PATH is missing' + end + private # Return the shared path used as a fallback base location to each blob type @@ -146,14 +154,6 @@ def absolute_path(path) Pathname(File.expand_path(path, gitlab_basepath)) end - # Return the GitLab base directory - # @return [Pathname] - def gitlab_basepath - return Pathname.new(GITLAB_PATH) if GITLAB_PATH - - raise ::Gitlab::Backup::Cli::Error, 'GITLAB_PATH is missing' - end - def gitlab_config return @gitlab_config unless @gitlab_config.nil? diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database.rb new file mode 100644 index 0000000000000000000000000000000000000000..53e599346411b46e203bbe53d67522b6d57da1f0 --- /dev/null +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Backup + module Cli + module Database + autoload :Configuration, 'gitlab/backup/cli/database/configuration' + autoload :Connection, 'gitlab/backup/cli/database/connection' + autoload :EachDatabase, 'gitlab/backup/cli/database/each_database' + autoload :Postgres, 'gitlab/backup/cli/database/postgres' + autoload :Wrapper, 'gitlab/backup/cli/database/wrapper' + end + end + end +end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/configuration.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/configuration.rb new file mode 100644 index 0000000000000000000000000000000000000000..4cc853d62214ee166f14b7f422ecb933889ee2c9 --- /dev/null +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/configuration.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +require 'active_record' + +module Gitlab + module Backup + module Cli + module Database + class Configuration + # Connection name is the key used in `config/database.yml` for multi-database connection configuration + # + # @return [String] + attr_reader :connection_name, :activerecord_variables + + # ActiveRecord base model that is configured to connect to the database identified by connection_name key + # + # @return [ActiveRecord::Base] + attr_reader :source_model, :db_wrapper + + # Initializes configuration + # + # @param [String] connection_name the key from `database.yml` for multi-database connection configuration + def initialize(connection_name, db_wrapper) + @connection_name = connection_name + @db_wrapper = db_wrapper + @source_model = db_wrapper.database_base_models_with_gitlab_shared[connection_name] || + db_wrapper.database_base_models_with_gitlab_shared['main'] + @activerecord_database_config = ActiveRecord::Base.configurations.find_db_config(connection_name) || + ActiveRecord::Base.configurations.find_db_config('main') + end + + # # Return the HashConfig for the database + # # + # # @return [ActiveRecord::DatabaseConfigurations::HashConfig] + def activerecord_configuration + ActiveRecord::DatabaseConfigurations::HashConfig.new( + @activerecord_database_config&.env_name || db_wrapper.context.env, + connection_name, + activerecord_variables + ) + end + + private + + # Return the database configuration from rails config/database.yml file + # in the format expected by ActiveRecord::DatabaseConfigurations::HashConfig + # + # @return [Hash] configuration hash + def original_activerecord_config + @activerecord_database_config.configuration_hash.dup + end + end + end + end + end +end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/connection.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/connection.rb new file mode 100644 index 0000000000000000000000000000000000000000..68cdc20d407a0bc5a65839c937cabd425737970e --- /dev/null +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/connection.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module Gitlab + module Backup + module Cli + module Database + class Connection + attr_reader :database_configuration, :snapshot_id + + delegate :connection_name, to: :database_configuration + delegate :connection, to: :@backup_model + + # Initializes a database connection + # + # @param [String] connection_name the key from `database.yml` for multi-database connection configuration + def initialize(connection_name, db_wrapper) + @database_configuration = Gitlab::Backup::Cli::Database::Configuration.new(connection_name, db_wrapper) + @backup_model = backup_model + @snapshot_id = nil + + configure_backup_model + end + + # Start a new transaction and run pg_export_snapshot() + # Returns the snapshot identifier + # + # @return [String] snapshot identifier + # def export_snapshot! + # disable_timeouts! + + # connection.begin_transaction(isolation: :repeatable_read) + # @snapshot_id = connection.select_value("SELECT pg_export_snapshot()") + # end + + # # Rollback the transaction to release the effects of pg_export_snapshot() + # def release_snapshot! + # return unless snapshot_id + + # connection.rollback_transaction + # @snapshot_id = nil + # end + + def disable_timeouts! + transaction_timeout_settings.disable_timeouts + end + + # def restore_timeouts! + # transaction_timeout_settings.restore_timeouts + # end + + private + + delegate :activerecord_configuration, to: :database_configuration, private: true + + def configure_backup_model + @backup_model.establish_connection(activerecord_configuration) + + # Gitlab::Database::LoadBalancing::Setup.new(@backup_model).setup + end + + # Creates a disposable model to be used to host the Backup connection only + def backup_model + klass_name = connection_name.camelize + + return "#{self.class.name}::#{klass_name}".constantize if self.class.const_defined?(klass_name.to_sym, false) + + self.class.const_set(klass_name, Class.new) + end + + def transaction_timeout_settings + Gitlab::Database::TransactionTimeoutSettings.new(connection) + end + end + end + end + end +end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/each_database.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/each_database.rb new file mode 100644 index 0000000000000000000000000000000000000000..271d59c8b61c497870a1728b9d11818194b27f73 --- /dev/null +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/each_database.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +module Gitlab + module Backup + module Cli + module Database + module EachDatabase + class << self + # def each_connection(only: nil, include_shared: true) + # selected_names = Array.wrap(only) + # base_models = select_base_models(selected_names) + + # base_models.each_pair do |connection_name, model| + # next if !include_shared && Gitlab::Database.db_config_share_with(model.connection_db_config) + + # connection = model.connection + + # with_shared_connection(connection, connection_name) do + # yield connection, connection_name + # end + # end + # end + + # def each_model_connection(models, only_on: nil, &blk) + # selected_databases = Array.wrap(only_on).map(&:to_sym) + + # models.each do |model| + # # If model is shared, iterate all available base connections + # # Example: `LooseForeignKeys::DeletedRecord` + # if model < ::Gitlab::Database::SharedModel + # with_shared_model_connections(model, selected_databases, &blk) + # else + # with_model_connection(model, selected_databases, &blk) + # end + # end + # end + + private + + def select_base_models(names) + base_models = Gitlab::Database.database_base_models_with_gitlab_shared + return base_models if names.empty? + + names.each_with_object(HashWithIndifferentAccess.new) do |name, hash| + raise ArgumentError, "#{name} is not a valid database name" unless base_models.key?(name) + + hash[name] = base_models[name] + end + end + + def with_shared_model_connections(shared_model, selected_databases, &blk) + Gitlab::Database.database_base_models_with_gitlab_shared.each_pair do |connection_name, connection_model| + if shared_model.limit_connection_names + next unless shared_model.limit_connection_names.include?(connection_name.to_sym) + end + + next if selected_databases.present? && selected_databases.exclude?(connection_name.to_sym) + + with_shared_connection(connection_model.connection, connection_name) do + yield shared_model, connection_name + end + end + end + + def with_model_connection(model, selected_databases, &blk) + connection_name = model.connection_db_config.name + + return if selected_databases.present? && selected_databases.exclude?(connection_name.to_sym) + + with_shared_connection(model.connection, connection_name) do + yield model, connection_name + end + end + + def with_shared_connection(connection, connection_name) + Gitlab::Database::SharedModel.using_connection(connection) do + Gitlab::AppLogger.debug(message: 'Switched database connection', connection_name: connection_name) + + yield + end + end + end + end + end + end + end +end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/postgres.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/postgres.rb new file mode 100644 index 0000000000000000000000000000000000000000..83d3002a1810eaf18d3bf5edcc53e07c86701767 --- /dev/null +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/postgres.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Gitlab + module Backup + module Cli + module Database + class Postgres + # Owner can read/write, group no permission, others no permission + FILE_PERMISSION = 0o600 + + # Triggers PgDump and outputs to the provided file path + # + # @param [String] output_file_path full path to the output destination + # @param [Gitlab::Backup::Cli::Utils::PgDump] pg_dump + # @return [Boolean] whether pg_dump finished with success + def dump(output_file_path, pg_dump) + compress_rd, compress_wr = IO.pipe + + compress_pid = spawn(compress_cmd, in: compress_rd, out: [output_file_path, 'w', FILE_PERMISSION]) + compress_rd.close + + dump_pid = pg_dump.spawn(output: compress_wr) + compress_wr.close + + [compress_pid, dump_pid].all? do |pid| + Process.waitpid(pid) + $?.success? + end + end + + private + + def compress_cmd + Utils::Compression.compression_command.cmd_args.flatten.first + end + end + end + end + end +end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/wrapper.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/wrapper.rb new file mode 100644 index 0000000000000000000000000000000000000000..56d8f03ae15cfe1cee941cbb5678748b2a59c6b9 --- /dev/null +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/database/wrapper.rb @@ -0,0 +1,446 @@ +# frozen_string_literal: true + +module Gitlab + module Backup + module Cli + module Database + class Wrapper + attr_reader :context + + def initialize(context) + @context = context + end + + # MAIN_DATABASE_NAME = 'main' + CI_DATABASE_NAME = 'ci' + # DEFAULT_POOL_HEADROOM = 10 + + # This constant is used when renaming tables concurrently. + # If you plan to rename a table using the `rename_table_safely` method, add your table here one milestone before the rename. + # Example: + # TABLES_TO_BE_RENAMED = { + # 'old_name' => 'new_name' + # }.freeze + # TABLES_TO_BE_RENAMED = {}.freeze + + # Minimum PostgreSQL version requirement per documentation: + # https://docs.gitlab.com/ee/install/requirements.html#postgresql-requirements + # MINIMUM_POSTGRES_VERSION = 14 + + # https://www.postgresql.org/docs/9.2/static/datatype-numeric.html + # MAX_INT_VALUE = 2147483647 + # MIN_INT_VALUE = -2147483648 + + # The max value between MySQL's TIMESTAMP and PostgreSQL's timestampz: + # https://www.postgresql.org/docs/9.1/static/datatype-datetime.html + # https://dev.mysql.com/doc/refman/5.7/en/datetime.html + # FIXME: this should just be the max value of timestampz + MAX_TIMESTAMP_VALUE = Time.at((1 << 31) - 1).freeze + + # The maximum number of characters for text fields, to avoid DoS attacks via parsing huge text fields + # https://gitlab.com/gitlab-org/gitlab-foss/issues/61974 + # MAX_TEXT_SIZE_LIMIT = 1_000_000 + + # Schema we store dynamically managed partitions in (e.g. for time partitioning) + DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic + + # Schema we store static partitions in (e.g. for hash partitioning) + STATIC_PARTITIONS_SCHEMA = :gitlab_partitions_static + + # This is an extensive list of postgres schemas owned by GitLab + # It does not include the default public schema + # EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA, STATIC_PARTITIONS_SCHEMA].freeze + + # PRIMARY_DATABASE_NAME = ActiveRecord::Base.connection_db_config.name.to_sym # rubocop:disable Database/MultipleDatabases + + # FULLY_QUALIFIED_IDENTIFIER = /^\w+\.\w+$/ + + ## Database Modes + MODE_SINGLE_DATABASE = "single-database" + MODE_SINGLE_DATABASE_CI_CONNECTION = "single-database-ci-connection" + MODE_MULTIPLE_DATABASES = "multiple-databases" + + def all_database_connection_files + Dir.glob(context.gitlab_basepath.join("db/database_connections/*.yaml")) + end + + def all_gitlab_schema_files + Dir.glob(context.gitlab_basepath.join("db/gitlab_schemas/*.yaml")) + end + + def all_database_connections + @all_database_connections ||= + all_database_connection_files + .map { |file| ConnectionInfo.load_file(file) } + .sort_by(&:order) + .index_by(&:name) + .with_indifferent_access.freeze + end + + # def all_database_names + # all_database_connections.keys.map(&:to_s) + # end + + def all_gitlab_schemas + @all_gitlab_schemas ||= + all_gitlab_schema_files + .map { |file| GitlabSchemaInfo.load_file(file) } + .index_by(&:name) + .with_indifferent_access.freeze + end + + def database_base_models + # Note that we use ActiveRecord::Base here and not ApplicationRecord. + # This is deliberate, as we also use these classes to apply load + # balancing to, and the load balancer must be enabled for _all_ models + # that inherit from ActiveRecord::Base; not just our own models that + # inherit from ApplicationRecord. + @database_base_models ||= + all_database_connections + .transform_values(&:connection_class) + .compact.with_indifferent_access.freeze + end + + # This returns a list of databases that contains all the gitlab_shared schema + # tables. + def database_base_models_with_gitlab_shared + @database_base_models_with_gitlab_shared ||= + all_database_connections + .select { |_, db| db.has_gitlab_shared? } + .transform_values(&:connection_class) + .compact.with_indifferent_access.freeze + end + + # # This returns a list of databases whose connection supports database load + # # balancing. We can't reuse the database_base_models since not all connections + # # do support load balancing. + # def database_base_models_using_load_balancing + # @database_base_models_using_load_balancing ||= + # all_database_connections + # .select { |_, db| db.uses_load_balancing? } + # .transform_values(&:connection_class) + # .compact.with_indifferent_access.freeze + # end + + # This returns a list of base models with connection associated for a given gitlab_schema + def schemas_to_base_models + @schemas_to_base_models ||= + all_gitlab_schemas.transform_values do |schema| + all_database_connections + .values + .select { |db| db.gitlab_schemas.include?(schema.name) } + .filter_map { |db| db.connection_class_or_fallback(all_database_connections) } + .uniq + end.compact.with_indifferent_access.freeze + end + + # We configure the database connection pool size automatically based on the + # configured concurrency. We also add some headroom, to make sure we don't + # run out of connections when more threads besides the 'user-facing' ones + # are running. + # + # Read more about this in + # doc/development/database/client_side_connection_pool.md + # def default_pool_size + # headroom = + # (ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i + + # Gitlab::Runtime.max_threads + headroom + # end + + # Expose path information so that we can use it to make sure migrations are + # healthy + def upgrade_path + path_data = YAML.safe_load_file(context.gitlab_basepath.join('config/upgrade_path.yml')) + Gitlab::Utils::UpgradePath.new(path_data, Gitlab.version_info) + end + + # Migrations before this version may have been removed. + # def min_schema_gitlab_version + # upgrade_path.last_required_stop + # end + + # Database configured. Returns true even if the database is shared + def has_config?(database_name) + true + # TODO + # ActiveRecord::Base.configurations + # .configs_for(env_name: context.env, name: database_name.to_s, include_hidden: true) + # .present? + end + + # Database configured. Returns false if the database is shared + def has_database?(database_name) + db_config = database_base_models[database_name]&.connection_db_config + db_config.present? && db_config_share_with(db_config).nil? + end + + def multiple_databases? + database_mode == MODE_MULTIPLE_DATABASES + end + + def database_mode + if !has_config?(CI_DATABASE_NAME) + MODE_SINGLE_DATABASE + elsif has_database?(CI_DATABASE_NAME) + MODE_MULTIPLE_DATABASES + else + MODE_SINGLE_DATABASE_CI_CONNECTION + end + end + + # class PgUser < ApplicationRecord + # table_name = 'pg_user' + # primary_key = :usename + # end + + # rubocop: disable CodeReuse/ActiveRecord + # def check_for_non_superuser + # user = PgUser.find_by('usename = CURRENT_USER') + # am_i_superuser = user.usesuper + + # Gitlab::AppLogger.info( + # "Account details: User: \"#{user.usename}\", UseSuper: (#{am_i_superuser})" + # ) + + # raise 'Error: detected superuser' if am_i_superuser + # rescue ActiveRecord::StatementInvalid + # raise 'User CURRENT_USER not found' + # end + # rubocop: enable CodeReuse/ActiveRecord + + # def random + # "RANDOM()" + # end + + # def true_value + # "'t'" + # end + + # def false_value + # "'f'" + # end + + # def sanitize_timestamp(timestamp) + # MAX_TIMESTAMP_VALUE > timestamp ? timestamp : MAX_TIMESTAMP_VALUE.dup + # end + + # def all_uncached(&block) + # # Calls to #uncached only disable caching for the current connection. Since the load balancer + # # can potentially upgrade from read to read-write mode (using a different connection), we specify + # # up-front that we'll explicitly use the primary for the duration of the operation. + # Gitlab::Database::LoadBalancing::Session.current.use_primary do + # base_models = database_base_models_using_load_balancing.values + # base_models.reduce(block) { |blk, model| -> { model.uncached(&blk) } }.call + # end + # end + + # def allow_cross_joins_across_databases(url:) + # # this method is implemented in: + # # spec/support/database/prevent_cross_joins.rb + # yield + # end + + # def add_post_migrate_path_to_rails(force: false) + # return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force + + # Rails.application.config.paths['db'].each do |db_path| + # path = context.gitlab_basepath.join(db_path, 'post_migrate').to_s + + # next if Rails.application.config.paths['db/migrate'].include? path + + # Rails.application.config.paths['db/migrate'] << path + + # # Rails memoizes migrations at certain points where it won't read the above + # # path just yet. As such we must also update the following list of paths. + # ActiveRecord::Migrator.migrations_paths << path + # end + # end + + # def db_config_names(with_schema:) + # db_config_names = ::ActiveRecord::Base.configurations + # .configs_for(env_name: Rails.env).map(&:name) + # return db_config_names unless with_schema + + # schema_models = schemas_to_base_models.fetch(with_schema) + # db_config_names.select do |db_config_name| + # db_info = all_database_connections.fetch(db_config_name) + # schema_models.include?(db_info.connection_class) + # end + # end + + # This returns all matching schemas that a given connection can use + # Since the `ActiveRecord::Base` might change the connection (from main to ci) + # This does not look at literal connection names, but rather compares + # models that are holders for a given db_config_name + # def gitlab_schemas_for_connection(connection) + # db_config = db_config_for_connection(connection) + + # # connection might not be yet adopted (returning NullPool, and no connection_klass) + # # in such cases it is fine to ignore such connections + # return unless db_config + + # db_config_name = db_config.name.delete_suffix(LoadBalancing::LoadBalancer::REPLICA_SUFFIX) + # primary_model = database_base_models.fetch(db_config_name.to_sym) + + # schemas_to_base_models.select do |_, child_models| + # child_models.any? do |child_model| + # child_model == primary_model || \ + # # The model might indicate a child connection, ensure that this is enclosed in a `db_config` + # database_base_models[db_config_share_with(child_model.connection_db_config)] == primary_model + # end + # end.keys.map!(&:to_sym) + # end + + def db_config_for_connection(connection) + return unless connection + + # For a ConnectionProxy we want to avoid ambiguous db_config as it may + # sometimes default to replica so we always return the primary config + # instead. + # if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy) + # return connection.load_balancer.configuration.db_config + # end + + # During application init we might receive `NullPool` + return unless connection.respond_to?(:pool) && + connection.pool.respond_to?(:db_config) + + db_config = connection.pool.db_config + db_config unless empty_config?(db_config) + end + + def empty_config?(db_config) + return true unless db_config + + db_config.is_a?(ActiveRecord::ConnectionAdapters::NullPool::NullConfig) + end + + # At the moment, the connection can only be retrieved by + # Gitlab::Database::LoadBalancer#read or #read_write or from the + # ActiveRecord directly. Therefore, if the load balancer doesn't + # recognize the connection, this method returns the primary role + # directly. In future, we may need to check for other sources. + # Expected returned names: + # main, main_replica, ci, ci_replica, unknown + # def db_config_name(connection) + # db_config = db_config_for_connection(connection) + # db_config&.name || 'unknown' + # end + + # def db_config_database(connection) + # db_config = db_config_for_connection(connection) + # db_config&.database || 'unknown' + # end + + # If the `database_tasks: false` is being used, + # return the expected fallback database for this database configuration + def db_config_share_with(db_config) + # no sharing + return if db_config.database_tasks? + + database_connection_info = all_database_connections[db_config.name] + + if database_connection_info + database_connection_info.fallback_database&.to_s + else + # legacy behaviour + 'main' + end + end + + def read_only? + false + end + + # def read_write? + # !read_only? + # end + + # Determines minimum viable migration version, determined by the timestamp + # of the earliest migration file. + # def read_minimum_migration_version + # Dir.open( + # context.gitlab_basepath.join('db/migrate') + # ).filter_map { |f| /\A\d{14}/.match(f)&.to_s }.map(&:to_i).min + # end + + # Monkeypatch rails with upgraded database observability + # def install_transaction_metrics_patches! + # ActiveRecord::Base.prepend(ActiveRecordBaseTransactionMetrics) + # end + + # def install_transaction_context_patches! + # ActiveRecord::ConnectionAdapters::TransactionManager + # .prepend(TransactionManagerContext) + # ActiveRecord::ConnectionAdapters::RealTransaction + # .prepend(RealTransactionContext) + # end + + # MonkeyPatch for ActiveRecord::Base for adding observability + module ActiveRecordBaseTransactionMetrics + extend ActiveSupport::Concern + + class_methods do + # A patch over ApplicationRecord.transaction that provides + # observability into transactional methods. + # def transaction(**options, &block) + # transaction_type = get_transaction_type(connection.transaction_open?, options[:requires_new]) + + # ::Gitlab::Database::Metrics.subtransactions_increment(name) if transaction_type == :sub_transaction + + # if ::Gitlab.next_rails? + # super(**options, &block) + # else + # payload = { connection: connection, transaction_type: transaction_type } + + # ActiveSupport::Notifications.instrument('transaction.active_record', payload) do + # super(**options, &block) + # end + # end + # end + + private + + def get_transaction_type(transaction_open, requires_new_flag) + if transaction_open + return :sub_transaction if requires_new_flag + + return :fake_transaction + end + + :real_transaction + end + end + end + + # rubocop:disable Gitlab/ModuleWithInstanceVariables + module TransactionManagerContext + # def transaction_context + # @stack.first.try(:gitlab_transaction_context) + # end + end + + module RealTransactionContext + def gitlab_transaction_context + @gitlab_transaction_context ||= ::Gitlab::Database::Transaction::Context.new + end + + def commit + gitlab_transaction_context.commit + + super + end + + def rollback + gitlab_transaction_context.rollback + + super + end + end + # rubocop:enable Gitlab/ModuleWithInstanceVariables + end + end + end + end +end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb index 58493c9b7dc1a17649b6993ea88fa887cdabee43..a658ea173b4633c303c46fa7fa5adfbe5cdc9605 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/targets/database.rb @@ -5,8 +5,6 @@ module Backup module Cli module Targets class Database < Target - attr_reader :errors - IGNORED_ERRORS = [ # Ignore warnings /WARNING:/, @@ -17,8 +15,14 @@ class Database < Target ].freeze IGNORED_ERRORS_REGEXP = Regexp.union(IGNORED_ERRORS).freeze - def initialize + attr_reader :errors + attr_reader :database_wrapper + + def initialize(context) + super(context) + @errors = [] + @database_wrapper = ::Gitlab::Backup::Cli::Database::Wrapper.new(context) end def dump(destination_dir) @@ -36,9 +40,9 @@ def dump(destination_dir) schemas = [] - if Gitlab.config.backup.pg_schema - schemas << Gitlab.config.backup.pg_schema - schemas.push(*Gitlab::Database::EXTRA_SCHEMAS.map(&:to_s)) + if context.backup_pg_schema + schemas << context.backup_pg_schema + schemas.push(*databse::EXTRA_SCHEMAS.map(&:to_s)) end pg_dump = ::Gitlab::Backup::Cli::Utils::PgDump.new( @@ -47,7 +51,8 @@ def dump(destination_dir) schemas: schemas, env: pg_env) - success = ::Backup::Dump::Postgres.new.dump(dump_file_name, pg_dump) + # TODO + success = ::Gitlab::Backup::Cli::Database::Postgres.new.dump(dump_file_name, pg_dump) backup_connection.release_snapshot! if backup_connection.snapshot_id @@ -56,11 +61,11 @@ def dump(destination_dir) report_finish_status(success) end ensure - if multiple_databases? - ::Gitlab::Database::EachDatabase.each_connection( + if database_wrapper.multiple_databases? + ::Gitlab::Backup::Cli::Database::EachDatabase.each_connection( only: base_models_for_backup.keys, include_shared: false ) do |_, database_connection_name| - backup_connection = ::Backup::DatabaseConnection.new(database_connection_name) + backup_connection = ::Gitlab::Backup::Cli::Database::Connection.new(database_connection_name, database_wrapper) backup_connection.restore_timeouts! rescue ActiveRecord::ConnectionNotEstablished raise DatabaseBackupError.new( @@ -73,7 +78,7 @@ def dump(destination_dir) def restore(destination_dir) base_models_for_backup.each do |database_name, _| - backup_connection = ::Backup::DatabaseConnection.new(database_name) + backup_connection = ::Gitlab::Backup::Cli::Database::Connection.new(database_name, database_wrapper) config = backup_connection.database_configuration.activerecord_variables @@ -125,7 +130,7 @@ def restore(destination_dir) protected def base_models_for_backup - @base_models_for_backup ||= ::Gitlab::Database.database_base_models_with_gitlab_shared + @base_models_for_backup ||= database_wrapper.database_base_models_with_gitlab_shared end def main_database?(database_name) @@ -151,6 +156,7 @@ def report_finish_status(status) def drop_tables(database_name) Gitlab::Backup::Cli::Output.info 'Cleaning the database ... ' + # TODO if Rake::Task.task_defined? "gitlab:db:drop_tables:#{database_name}" Rake::Task["gitlab:db:drop_tables:#{database_name}"].invoke else @@ -181,13 +187,13 @@ def each_database(destination_dir, &block) # and reject the ones that are shared, so we don't get duplicates # # we consider a connection to be shared when it has `database_tasks: false` - ::Gitlab::Database::EachDatabase.each_connection( + ::Gitlab::Backup::Cli::Database::EachDatabase.each_connection( only: base_models_for_backup.keys, include_shared: false ) do |_, database_connection_name| - backup_connection = ::Backup::DatabaseConnection.new(database_connection_name) + backup_connection = ::Gitlab::Backup::Cli::Database::Connection.new(database_connection_name, database_wrapper) databases << backup_connection - next unless multiple_databases? + next unless database_wrapper.multiple_databases? begin # Trigger a transaction snapshot export that will be used by pg_dump later on @@ -202,10 +208,6 @@ def each_database(destination_dir, &block) databases.each(&block) end - - def multiple_databases? - ::Gitlab::Database.database_mode == ::Gitlab::Database::MODE_MULTIPLE_DATABASES - end end end end diff --git a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/tasks/database.rb b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/tasks/database.rb index 5c4e94326c9f06c7659439a339fa2450d175c9fb..5a772e9355448e59f353d740f3160eb1a3b2f16f 100644 --- a/gems/gitlab-backup-cli/lib/gitlab/backup/cli/tasks/database.rb +++ b/gems/gitlab-backup-cli/lib/gitlab/backup/cli/tasks/database.rb @@ -16,7 +16,7 @@ def cleanup_path = 'db' private def target - ::Gitlab::Backup::Cli::Targets::Database.new + ::Gitlab::Backup::Cli::Targets::Database.new(context) end end end diff --git a/gems/gitlab-backup-cli/spec/gitlab/backup/cli/targets/database_spec.rb b/gems/gitlab-backup-cli/spec/gitlab/backup/cli/targets/database_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..bb667ac530466f20ad75d2f5adcc2d079644be23 --- /dev/null +++ b/gems/gitlab-backup-cli/spec/gitlab/backup/cli/targets/database_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Backup::Cli::Targets::Database do + let(:context) { Gitlab::Backup::Cli::Context.build } + let(:database) { described_class.new(context) } + + describe '#dump' do + let(:destination) { '/path/to/destination' } + # let(:backup_connection) { double('backup_connection') } + # let(:database_configuration) { double('database_configuration') } + let(:pg_env_variables) { { 'PGHOST' => 'localhost' } } + let(:activerecord_variables) { { database: 'gitlabhq_production' } } + + before do + # allow(backup_connection).to receive(:database_configuration).and_return(database_configuration) + # allow(database_configuration).to receive(:pg_env_variables).and_return(pg_env_variables) + # allow(database_configuration).to receive(:activerecord_variables).and_return(activerecord_variables) + allow(Gitlab::Backup::Cli::Database::EachDatabase).to receive(:each_connection).and_yield(nil, 'main') + # allow(Gitlab::Backup::Cli::Database::Connection).to receive(:new).with('main').and_return(backup_connection) + end + + it 'creates the destination directory' do + expect(FileUtils).to receive(:mkdir_p).with(destination) + database.dump(destination) + end + + # it 'dumps the database' do + # dump_double = double('dump') + # expect(Gitlab::Backup::Cli::Database::Postgres).to receive(:new).and_return(dump_double) + # expect(dump_double).to receive(:dump) + # database.dump(destination) + # end + + # it 'releases the snapshot after dumping' do + # expect(backup_connection).to receive(:release_snapshot!) + # database.dump(destination) + # end + + # it 'raises an error if the dump fails' do + # allow(Gitlab::Backup::Cli::Database::Postgres).to receive(:new).and_return(double('dump', dump: false)) + # expect { database.dump(destination) }.to raise_error(Gitlab::Backup::Cli::Errors::DatabaseBackupError) + # end + end + + # describe '#restore' do + # let(:source) { '/path/to/source' } + # let(:backup_connection) { double('backup_connection') } + # let(:database_configuration) { double('database_configuration') } + # let(:pg_env_variables) { { 'PGHOST' => 'localhost' } } + # let(:activerecord_variables) { { database: 'gitlabhq_production' } } + + # before do + # allow(backup_connection).to receive(:database_configuration).and_return(database_configuration) + # allow(database_configuration).to receive(:pg_env_variables).and_return(pg_env_variables) + # allow(database_configuration).to receive(:activerecord_variables).and_return(activerecord_variables) + # allow(Gitlab::Backup::Cli::Database::Connection).to receive(:new).with(:main).and_return(backup_connection) + # end + + # it 'drops all tables before restoring' do + # expect(database).to receive(:drop_tables).with(:main) + # database.restore(source) + # end + + # it 'restores the database' do + # allow(File).to receive(:exist?).and_return(true) + # expect(database).to receive(:with_transient_pg_env).and_yield + # database.restore(source) + # end + + # it 'raises an error if the database file does not exist' do + # allow(File).to receive(:exist?).and_return(false) + # expect { database.restore(source) }.to raise_error(Gitlab::Backup::Cli::Errors::DatabaseBackupError) + # end + # end +end