diff --git a/ee/spec/lib/ee/backup/repositories_spec.rb b/ee/spec/lib/ee/backup/repositories_spec.rb index 4750e3dde8c83b6ed6562a42ac8c91fa31af65ab..45066a4a56c81fe4daf2a24266bc236b64d7958e 100644 --- a/ee/spec/lib/ee/backup/repositories_spec.rb +++ b/ee/spec/lib/ee/backup/repositories_spec.rb @@ -9,8 +9,11 @@ let(:paths) { [] } let(:destination) { 'repositories' } let(:backup_id) { 'backup_id' } + let(:backup_options) { Backup::Options.new } - subject { described_class.new(progress, strategy: strategy, storages: storages, paths: paths) } + subject do + described_class.new(progress, strategy: strategy, options: backup_options, storages: storages, paths: paths) + end describe '#dump' do let_it_be(:project) { create(:project, :repository) } diff --git a/lib/backup/database.rb b/lib/backup/database.rb index a0eaccb1ca4f7f8a4ce4a912c08647ee5d38ad20..962214407d9f3e3f42acdff768cdff226d602407 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -18,8 +18,8 @@ class Database < Task ].freeze IGNORED_ERRORS_REGEXP = Regexp.union(IGNORED_ERRORS).freeze - def initialize(progress, force:) - super(progress) + def initialize(progress, options:, force:) + super(progress, options: options) @force = force end diff --git a/lib/backup/files.rb b/lib/backup/files.rb index e3a8290e2e3aac0eec94211f3e9d92be41314e00..adf9a081ad58d0c8468d13fd3286857cc4b9d83b 100644 --- a/lib/backup/files.rb +++ b/lib/backup/files.rb @@ -11,8 +11,8 @@ class Files < Task attr_reader :excludes - def initialize(progress, app_files_dir, excludes: []) - super(progress) + def initialize(progress, app_files_dir, options:, excludes: []) + super(progress, options: options) @app_files_dir = app_files_dir @excludes = [DEFAULT_EXCLUDE].concat(excludes) diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index 1c53e675b2abaefda1ac2827f3316923533681de..2b3bc85ae6d06fbdbc1b6cffce8a7bd241f52c92 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -9,17 +9,6 @@ class Manager # if some of these files are still there, we don't need them in the backup LEGACY_PAGES_TMP_PATH = '@pages.tmp' - LIST_ENVS = { - skipped: 'SKIP', - repositories_storages: 'REPOSITORIES_STORAGES', - repositories_paths: 'REPOSITORIES_PATHS', - skip_repositories_paths: 'SKIP_REPOSITORIES_PATHS' - }.freeze - - YAML_PERMITTED_CLASSES = [ - ActiveSupport::TimeWithZone, ActiveSupport::TimeZone, Symbol, Time - ].freeze - TaskDefinition = Struct.new( :enabled, # `true` if the task can be used. Treated as `true` when not specified. :human_name, # Name of the task used for logging. @@ -34,16 +23,22 @@ def enabled? end end - attr_reader :progress + attr_reader :progress, :remote_storage, :options def initialize(progress, definitions: nil) @progress = progress - @incremental = Gitlab::Utils.to_boolean(ENV['INCREMENTAL'], default: false) @definitions = definitions + @options = Backup::Options.new + @metadata = Backup::Metadata.new(manifest_filepath) + @options.extract_from_env! # preserve existing behavior + @remote_storage = Backup::RemoteStorage.new(progress: progress, options: options) end def create - unpack(ENV.fetch('PREVIOUS_BACKUP', ENV['BACKUP'])) if incremental? + # Deprecation: Using backup_id (ENV['BACKUP']) to specify previous backup was deprecated in 15.0 + previous_backup = options.previous_backup || options.backup_id + + unpack(previous_backup) if options.incremental? run_all_create_tasks puts_time "Warning: Your gitlab.rb and gitlab-secrets.json files contain sensitive data \n" \ @@ -56,6 +51,7 @@ def run_create_task(task_name) build_backup_information definition = definitions[task_name] + destination_dir = File.join(Gitlab.config.backup.path, definition.destination_path) unless definition.enabled? puts_time "Dumping #{definition.human_name} ... ".color(:blue) + "[DISABLED]".color(:cyan) @@ -68,7 +64,7 @@ def run_create_task(task_name) end puts_time "Dumping #{definition.human_name} ... ".color(:blue) - definition.task.dump(File.join(Gitlab.config.backup.path, definition.destination_path), backup_id) + definition.task.dump(destination_dir, backup_id) puts_time "Dumping #{definition.human_name} ... ".color(:blue) + "done".color(:green) rescue Backup::DatabaseBackupError, Backup::FileBackupError => e @@ -76,7 +72,7 @@ def run_create_task(task_name) end def restore - unpack(ENV['BACKUP']) + unpack(options.backup_id) run_all_restore_tasks puts_time "Warning: Your gitlab.rb and gitlab-secrets.json files contain sensitive data \n" \ @@ -187,35 +183,32 @@ def build_definitions # rubocop:disable Metrics/AbcSize end def build_db_task - force = Gitlab::Utils.to_boolean(ENV['force'], default: false) - - Database.new(progress, force: force) + Database.new(progress, options: options, force: options.force?) end def build_repositories_task - max_concurrency = ENV['GITLAB_BACKUP_MAX_CONCURRENCY'].presence&.to_i - max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence&.to_i strategy = Backup::GitalyBackup.new(progress, - incremental: incremental?, - max_parallelism: max_concurrency, - storage_parallelism: max_storage_concurrency, + incremental: options.incremental?, + max_parallelism: options.max_parallelism, + storage_parallelism: options.max_storage_parallelism, server_side: backup_information[:repositories_server_side] ) Repositories.new(progress, - strategy: strategy, - storages: list_env(:repositories_storages), - paths: list_env(:repositories_paths), - skip_paths: list_env(:skip_repositories_paths) - ) + strategy: strategy, + options: options, + storages: options.repositories_storages, + paths: options.repositories_paths, + skip_paths: options.skip_repositories_paths + ) end def build_files_task(app_files_dir, excludes: []) - Files.new(progress, app_files_dir, excludes: excludes) + Files.new(progress, app_files_dir, options: options, excludes: excludes) end def run_all_create_tasks - if incremental? + if options.incremental? read_backup_information verify_backup_version update_backup_information @@ -229,14 +222,14 @@ def run_all_create_tasks write_backup_information - unless skipped?('tar') + unless options.skippable_operations.archive pack upload remove_old end ensure - cleanup unless skipped?('tar') + cleanup unless options.skippable_operations.archive remove_tmp end @@ -254,67 +247,56 @@ def run_all_restore_tasks Rake::Task['cache:clear'].invoke ensure - cleanup unless skipped?('tar') + cleanup unless options.skippable_operations.archive remove_tmp end - def incremental? - @incremental - end - def read_backup_information - @backup_information ||= YAML.safe_load_file( - File.join(backup_path, MANIFEST_NAME), - permitted_classes: YAML_PERMITTED_CLASSES) + @metadata.load! + + options.update_from_backup_information!(backup_information) end def write_backup_information - # Make sure there is a connection - ::Gitlab::Database.database_base_models.each_value do |base_model| - base_model.connection.reconnect! - end - - Dir.chdir(backup_path) do - File.open("#{backup_path}/#{MANIFEST_NAME}", "w+") do |file| - file << backup_information.to_yaml.gsub(/^---\n/, '') - end - end + @metadata.save! end def build_backup_information - @backup_information ||= { + return if @metadata.backup_information + + @metadata.update( db_version: ActiveRecord::Migrator.current_version.to_s, backup_created_at: Time.current, gitlab_version: Gitlab::VERSION, tar_version: tar_version, installation_type: Gitlab::INSTALLATION_TYPE, - skipped: ENV['SKIP'], - repositories_storages: ENV['REPOSITORIES_STORAGES'], - repositories_paths: ENV['REPOSITORIES_PATHS'], - skip_repositories_paths: ENV['SKIP_REPOSITORIES_PATHS'], - repositories_server_side: Gitlab::Utils.to_boolean(ENV['REPOSITORIES_SERVER_SIDE'], default: false) - } + skipped: options.serialize_skippables, + repositories_storages: options.repositories_storages.join(','), + repositories_paths: options.repositories_paths.join(','), + skip_repositories_paths: options.skip_repositories_paths.join(','), + repositories_server_side: options.repositories_server_side_backup + ) end def update_backup_information - @backup_information.merge!( + @metadata.update( full_backup_id: full_backup_id, db_version: ActiveRecord::Migrator.current_version.to_s, backup_created_at: Time.current, gitlab_version: Gitlab::VERSION, tar_version: tar_version, installation_type: Gitlab::INSTALLATION_TYPE, - skipped: list_env(:skipped).join(','), - repositories_storages: list_env(:repositories_storages).join(','), - repositories_paths: list_env(:repositories_paths).join(','), - skip_repositories_paths: list_env(:skip_repositories_paths).join(',') + skipped: options.serialize_skippables, + repositories_storages: options.repositories_storages.join(','), + repositories_paths: options.repositories_paths.join(','), + skip_repositories_paths: options.skip_repositories_paths.join(',') ) end def backup_information - raise Backup::Error, "#{MANIFEST_NAME} not yet loaded" unless @backup_information + raise Backup::Error, "#{MANIFEST_NAME} not yet loaded" unless @metadata.backup_information - @backup_information + @metadata.backup_information end def pack @@ -333,27 +315,7 @@ def pack end def upload - connection_settings = Gitlab.config.backup.upload.connection - if connection_settings.blank? || skipped?('remote') || skipped?('tar') - puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) + "[SKIPPED]".color(:cyan) - return - end - - puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) - - directory = connect_to_remote_directory - upload = directory.files.create(create_attributes) - - if upload - if upload.respond_to?(:encryption) && upload.encryption - puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) + "done (encrypted with #{upload.encryption})".color(:green) - else - puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) + "done".color(:green) - end - else - puts_time "Uploading backup to #{remote_directory} failed".color(:red) - raise Backup::Error, 'Backup failed' - end + remote_storage.upload(backup_information: backup_information) end def cleanup @@ -460,7 +422,7 @@ def unpack(source_backup_id) # print list of available backups puts_available_timestamps - if incremental? + if options.incremental? puts_time 'Please specify which one you want to create an incremental backup for:' puts_time 'rake gitlab:backup:create INCREMENTAL=true PREVIOUS_BACKUP=timestamp_of_backup' else @@ -499,19 +461,7 @@ def tar_version end def skipped?(item) - skipped.include?(item) - end - - def skipped - @skipped ||= list_env(:skipped) - end - - def list_env(name) - list = ENV.fetch(LIST_ENVS[name], '').split(',') - list += backup_information[name].split(',') if backup_information[name] - list.uniq! - list.compact! - list + options.skippable_tasks[item] end def enabled_task?(task_name) @@ -523,7 +473,11 @@ def backup_file?(file) end def non_tarred_backup? - File.exist?(File.join(backup_path, MANIFEST_NAME)) + File.exist?(manifest_filepath) + end + + def manifest_filepath + File.join(backup_path, MANIFEST_NAME) end def backup_path @@ -538,35 +492,6 @@ def available_timestamps @backup_file_list.map { |item| item.gsub("#{FILE_NAME_SUFFIX}", "") } end - def object_storage_config - @object_storage_config ||= ObjectStorage::Config.new(Gitlab.config.backup.upload) - end - - def connect_to_remote_directory - connection = ::Fog::Storage.new(object_storage_config.credentials) - - # We only attempt to create the directory for local backups. For AWS - # and other cloud providers, we cannot guarantee the user will have - # permission to create the bucket. - if connection.service == ::Fog::Storage::Local - connection.directories.create(key: remote_directory) - else - connection.directories.new(key: remote_directory) - end - end - - def remote_directory - Gitlab.config.backup.upload.remote_directory - end - - def remote_target - if ENV['DIRECTORY'] - File.join(ENV['DIRECTORY'], tar_file) - else - tar_file - end - end - def backup_contents [MANIFEST_NAME] + definitions.reject do |name, definition| skipped?(name) || !enabled_task?(name) || @@ -580,52 +505,19 @@ def tar_file def full_backup_id full_backup_id = backup_information[:full_backup_id] - full_backup_id ||= File.basename(ENV['PREVIOUS_BACKUP']) if ENV['PREVIOUS_BACKUP'].present? + full_backup_id ||= File.basename(options.previous_backup) if options.previous_backup.present? full_backup_id ||= backup_id full_backup_id end def backup_id - if ENV['BACKUP'].present? - File.basename(ENV['BACKUP']) + if options.backup_id.present? + File.basename(options.backup_id) else "#{backup_information[:backup_created_at].strftime('%s_%Y_%m_%d_')}#{backup_information[:gitlab_version]}" end end - def create_attributes - attrs = { - key: remote_target, - body: File.open(File.join(backup_path, tar_file)), - multipart_chunk_size: Gitlab.config.backup.upload.multipart_chunk_size, - storage_class: Gitlab.config.backup.upload.storage_class - }.merge(encryption_attributes) - - # Google bucket-only policies prevent setting an ACL. In any case, by default, - # all objects are set to the default ACL, which is project-private: - # https://cloud.google.com/storage/docs/json_api/v1/defaultObjectAccessControls - attrs[:public] = false unless google_provider? - - attrs - end - - def encryption_attributes - return object_storage_config.fog_attributes if object_storage_config.aws_server_side_encryption_enabled? - - # Use customer-managed keys. Also, this preserves - # backward-compatibility for existing usages of `SSE-S3` that - # don't set `backup.upload.storage_options.server_side_encryption` - # to `'AES256'`. - { - encryption_key: Gitlab.config.backup.upload.encryption_key, - encryption: Gitlab.config.backup.upload.encryption - } - end - - def google_provider? - Gitlab.config.backup.upload.connection&.provider&.downcase == 'google' - end - def puts_time(msg) progress.puts "#{Time.current} -- #{msg}" Gitlab::BackupLogger.info(message: "#{Rainbow.uncolor(msg)}") diff --git a/lib/backup/metadata.rb b/lib/backup/metadata.rb new file mode 100644 index 0000000000000000000000000000000000000000..aaee326058baf017e17af40c219c584e91845d96 --- /dev/null +++ b/lib/backup/metadata.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Backup + class Metadata + # Fullpath for the manifest file + # @return [Pathname] full path for the manifest file + attr_reader :manifest_filepath + + # Information present in the manifest file shipped along with the backup + # @return [BackupInformation] + attr_reader :backup_information + + YAML_PERMITTED_CLASSES = [ + ActiveSupport::TimeWithZone, ActiveSupport::TimeZone, Symbol, Time + ].freeze + + # Backup Manifest content, describing what the backup contains and the environment in which it was created + # Includes versions information, timestamp, installation type and other data required to restore or to + # keep incremental backups working + BackupInformation = Struct.new( + :db_version, # ActiveRecord::Migrator.current_version.to_s, + :backup_created_at, # Time.current, + :gitlab_version, # Gitlab::VERSION, + :tar_version, # tar_version, + :installation_type, # Gitlab::INSTALLATION_TYPE, + :skipped, # ENV['SKIP'] + :repositories_storages, # ENV['REPOSITORIES_STORAGES'], + :repositories_paths, # ENV['REPOSITORIES_PATHS'], + :skip_repositories_paths, # ENV['SKIP_REPOSITORIES_PATHS'], + :repositories_server_side, # Gitlab::Utils.to_boolean(ENV['REPOSITORIES_SERVER_SIDE'], default: false) + :full_backup_id, # full_backup_id, + keyword_init: true + ) + + def initialize(manifest_filepath) + @manifest_filepath = Pathname.new(manifest_filepath) + end + + # Load #BackupInformation from a YAML manifest file on disk + def load! + return @backup_information unless @backup_information.nil? + + manifest_data = load_from_file + + @backup_information = BackupInformation.new(**manifest_data) + end + + # Save content from #BackupInformation into a manifest YAML file on disk + def save! + Dir.chdir(File.dirname(manifest_filepath)) do + File.open(manifest_filepath, 'w+') do |file| + file << backup_information.to_h.to_yaml.gsub(/^---\n/, '') + end + end + end + + # Update backup information with provided data + # + # @param [Hash] data arguments matching #BackupInformation keyword arguments + def update(**data) + @backup_information ||= BackupInformation.new + + data.each_pair do |key, value| + backup_information[key] = value + end + end + + private + + def load_from_file + YAML.safe_load_file( + manifest_filepath, + permitted_classes: YAML_PERMITTED_CLASSES) + end + end +end diff --git a/lib/backup/options.rb b/lib/backup/options.rb new file mode 100644 index 0000000000000000000000000000000000000000..f0747594fe3e845155dd90f92717636b4ddc92a0 --- /dev/null +++ b/lib/backup/options.rb @@ -0,0 +1,266 @@ +# frozen_string_literal: true + +module Backup + # Backup options provided by the command line interface + class Options + # SkippableTasks store which tasks can be skipped + # Setting any one to true, will create or restore a backup without that data + # @example Skipping database content and CI job artifacts + # SkippableTasks.new(db: true, artifacts: true) + SkippableTasks = Struct.new( + :db, # Database content (PostgreSQL) + :uploads, # Attachments + :builds, # CI job output logs + :artifacts, # CI job artifacts + :lfs, # LFS objects + :terraform_state, # Terraform states + :registry, # Container registry images + :pages, # GitLab Pages content + :repositories, # Repositories + :packages, # Packages + :ci_secure_files, # Project-level Secure Files + keyword_init: true + ) + + # What operations can be skipped + SkippableOperations = Struct.new( + :archive, # whether to skip .tar step + :remote_storage, # whether to skip uploading to remote storage + keyword_init: true + ) + + CompressionOptions = Struct.new( + :compression_cmd, # custom compression command + :decompression_cmd, # custom decompression command + keyword_init: true + ) + + # Backup ID is the backup filename portion without extensions + # When this option is not provided, the backup name will be based on date, timestamp and gitlab version + # + # @return [String|Nil] backup id that is used as part of filename + attr_accessor :backup_id + + # Reference to previous backup full path + # + # @return [String|Nil] previous backup full path + attr_accessor :previous_backup + + # Run incremental backup? + # + # @return [Boolean] whether to run an incremental backup + attr_accessor :incremental + alias_method :incremental?, :incremental + + # Whether to bypass warnings when performing dangerous operations + # This is currently being used for the database restore task only + # + # @return [Boolean] whether to bypass warnings and perform dangerous operations + attr_accessor :force + alias_method :force?, :force + + # A list of all tasks and whether they can be skipped or not + # + # @return [SkippableTasks] + attr_accessor :skippable_tasks + + # A list of all operations and whether they can be skipped or not + # + # @return [SkippableOperations] + attr_accessor :skippable_operations + + # When using multiple repository storages, repositories can be backed up and restored in parallel + # This option allows to customize the overall limit. + # + # This is only used by repository backup and restore steps (GitalyBackup) + # + # @return [Integer|Nil] limit of backup or restore operations to happen in parallel overall + attr_accessor :max_parallelism + + # When using multiple repository storages, repositories can be backed up and restored in parallel + # This option allows to customize the limit per storage. + # + # This is only used by repository backup and restore steps (GitalyBackup) + # + # @return [Integer|Nil] limit of backup or restore operations to happen in parallel per storage + attr_accessor :max_storage_parallelism + + # When using multiple repository storages, repositories from specific storages can be backed up + # separately by running the backup operation while setting this option + # + # @return [Array<String>] a list of repository storages to be backed up + attr_accessor :repositories_storages + + # In order to backup specific repositories, multiple paths containing the + # selected namespaces will be used to find which repositories to backup + # + # Ex: ['group-a', 'group-b/project-c'] will select all projects in group-a and project-c in group-b + # This can be combined with #skip_repositories_paths + # + # @return [Array<String>] a list of paths to select which repositories to backup + attr_accessor :repositories_paths + + # In order to backup specific repositories, multiple paths containing the + # selected namespaces can be specified using #repositories_paths. To further + # refine the list, a new list of paths can be provided to be skipped among + # the previous pre-selected ones. + # + # Ex: for a repository_paths containing ['group-a'] and skip_repository_paths + # containing ['group-a/project-d'], all projects in `group-a` except `project-d` + # will be backed up + # + # @return [Array<String>] a list of paths to skip backup + attr_accessor :skip_repositories_paths + + # Specify GitalyBackup to handle and perform backups server-side and stream it to object storage + # + # When this is defined, repositories will not be part of the backup archive + # @return [Boolean] whether to perform server-side backups for repositories + attr_accessor :repositories_server_side_backup + + # A custom directory to send your remote backups to + # It can be used to group different types of backups (ex: daily, weekly) + # + # @return [String|Nil] + attr_accessor :remote_directory + + # Custom compression and decompression options + # + # When compression is customized, it will ignore other related options like `:gzip_rsyncable` + # @return [CompressionOptions] custom compression and decompression commands + attr_accessor :compression_options + + # Whether to run gzip with `--rsyncable` flag + # + # This is ignored if custom :compression_options are provided + # @return [Boolean] whether to use `--rsyncable` flag with gzip + attr_accessor :gzip_rsyncable + + # rubocop:disable Metrics/ParameterLists -- This is a data object with all possible CMD options + def initialize( + backup_id: nil, previous_backup: nil, incremental: false, force: false, + skippable_tasks: SkippableTasks.new, skippable_operations: SkippableOperations.new, + max_parallelism: nil, max_storage_parallelism: nil, + repository_storages: [], repository_paths: [], skip_repository_paths: [], + repositories_server_side_backup: false, remote_directory: nil, + compression_options: CompressionOptions.new, gzip_rsyncable: false) + @backup_id = backup_id + @previous_backup = previous_backup + @incremental = incremental + @force = force + @skippable_tasks = skippable_tasks + @skippable_operations = skippable_operations + @max_parallelism = max_parallelism + @max_storage_parallelism = max_storage_parallelism + @remote_directory = remote_directory + @repositories_server_side_backup = repositories_server_side_backup + @repositories_storages = repository_storages + @repositories_paths = repository_paths + @skip_repositories_paths = skip_repository_paths + @compression_options = compression_options + @gzip_rsyncable = gzip_rsyncable + end + # rubocop:enable Metrics/ParameterLists + + # rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize -- TODO: Complexity will be solved in the Unified Backup implementation (https://gitlab.com/groups/gitlab-org/-/epics/11635) + # Extract supported options from defined ENV variables + def extract_from_env! + # We've used lowercase `force` as the key while ENV normally is defined using UPPERCASE letters + # This provides a fallback when the user defines using expected standards, while not breaking compatibility + force_value = ENV.key?('FORCE') ? ENV['FORCE'] : ENV['force'] + + self.backup_id = ENV['BACKUP'] + self.previous_backup = ENV['PREVIOUS_BACKUP'] + self.incremental = Gitlab::Utils.to_boolean(ENV['INCREMENTAL'], default: incremental) + self.force = Gitlab::Utils.to_boolean(force_value, default: force) + self.max_parallelism = ENV['GITLAB_BACKUP_MAX_CONCURRENCY']&.to_i + self.max_storage_parallelism = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY']&.to_i + self.remote_directory = ENV['DIRECTORY'] + self.repositories_server_side_backup = Gitlab::Utils.to_boolean(ENV['REPOSITORIES_SERVER_SIDE'], + default: repositories_server_side_backup) + self.repositories_storages = ENV.fetch('REPOSITORIES_STORAGES', '').split(',').uniq + self.repositories_paths = ENV.fetch('REPOSITORIES_PATHS', '').split(',').uniq + self.skip_repositories_paths = ENV.fetch('SKIP_REPOSITORIES_PATHS', '').split(',').uniq + compression_options.compression_cmd = ENV['COMPRESS_CMD'] + compression_options.decompression_cmd = ENV['DECOMPRESS_CMD'] + self.gzip_rsyncable = Gitlab::Utils.to_boolean(ENV['GZIP_RSYNCABLE'], default: gzip_rsyncable) + + extract_skippables!(ENV['SKIP']) if ENV['SKIP'].present? + end + # rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize + + def update_from_backup_information!(backup_information) + self.repositories_storages += backup_information[:repositories_storages]&.split(',') || [] + self.repositories_storages.uniq! + self.repositories_storages.compact! + + self.repositories_paths += backup_information[:repositories_paths]&.split(',') || [] + self.repositories_paths.uniq! + self.repositories_paths.compact! + + self.skip_repositories_paths += backup_information[:skip_repositories_paths]&.split(',') || [] + self.skip_repositories_paths.uniq! + self.skip_repositories_paths.compact! + + extract_skippables!(backup_information[:skipped]) if backup_information[:skipped] + end + + # rubocop:disable Metrics/CyclomaticComplexity -- TODO: Complexity will be solved in the Unified Backup implementation (https://gitlab.com/groups/gitlab-org/-/epics/11635) + # Return a String with a list of skippables, separated by commas + # + # @return [String] a list of skippables + def serialize_skippables + list = [] + list << 'tar' if skippable_operations.archive + list << 'remote' if skippable_operations.remote_storage + list << 'db' if skippable_tasks.db + list << 'uploads' if skippable_tasks.uploads + list << 'builds' if skippable_tasks.builds + list << 'artifacts' if skippable_tasks.artifacts + list << 'lfs' if skippable_tasks.lfs + list << 'terraform_state' if skippable_tasks.terraform_state + list << 'registry' if skippable_tasks.registry + list << 'pages' if skippable_tasks.pages + list << 'repositories' if skippable_tasks.repositories + list << 'packages' if skippable_tasks.packages + list << 'ci_secure_files' if skippable_tasks.ci_secure_files + list.join(',') + end + # rubocop:enable Metrics/CyclomaticComplexity + + # Extract skippables from provided data field + # Current callers will provide either ENV['SKIP'] or backup_information[:skipped] content + # + # The first time the method is executed it will setup `true` or `false` to each field + # subsequent executions will preserve `true` values and evaluate again only when previously set to `false` + # + # @param [String] field contains a list separated by comma without surrounding spaces + def extract_skippables!(field) + list = field.split(',').uniq + + extract_skippable_operations!(list) + extract_skippable_tasks(list) + end + + private + + def extract_skippable_operations!(list) + skippable_operations.archive ||= list.include?('tar') # SKIP=tar + skippable_operations.remote_storage ||= list.include?('remote') # SKIP=remote + end + + def extract_skippable_tasks(list) + skippable_tasks.db ||= list.include?('db') # SKIP=db + skippable_tasks.uploads ||= list.include?('uploads') # SKIP=uploads + skippable_tasks.builds ||= list.include?('builds') # SKIP=builds + skippable_tasks.artifacts ||= list.include?('artifacts') # SKIP=artifacts + skippable_tasks.lfs ||= list.include?('lfs') # SKIP=lfs + skippable_tasks.terraform_state ||= list.include?('terraform_state') # SKIP=terraform_state + skippable_tasks.registry ||= list.include?('registry') # SKIP=registry + skippable_tasks.pages ||= list.include?('pages') # SKIP=pages + skippable_tasks.repositories ||= list.include?('repositories') # SKIP=repositories + skippable_tasks.packages ||= list.include?('packages') # SKIP=packages + skippable_tasks.ci_secure_files ||= list.include?('ci_secure_files') # SKIP=ci_secure_files + end + end +end diff --git a/lib/backup/remote_storage.rb b/lib/backup/remote_storage.rb new file mode 100644 index 0000000000000000000000000000000000000000..bf62d5d9a7b8b2a168338e3d0809e29529b3913a --- /dev/null +++ b/lib/backup/remote_storage.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +module Backup + class RemoteStorage + attr_reader :progress, :options, :backup_information + + def initialize(progress:, options:) + @progress = progress + @options = options + end + + def upload(backup_information:) + @backup_information = backup_information + connection_settings = Gitlab.config.backup.upload.connection + + if connection_settings.blank? || + options.skippable_operations.remote_storage || + options.skippable_operations.archive + puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) + + "[SKIPPED]".color(:cyan) + return + end + + puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) + + directory = connect_to_remote_directory + upload = directory.files.create(create_attributes) + + if upload + if upload.respond_to?(:encryption) && upload.encryption + puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) + + "done (encrypted with #{upload.encryption})".color(:green) + else + puts_time "Uploading backup archive to remote storage #{remote_directory} ... ".color(:blue) + + "done".color(:green) + end + else + puts_time "Uploading backup to #{remote_directory} failed".color(:red) + raise Backup::Error, 'Backup failed' + end + end + + def remote_target + if options.remote_directory + File.join(options.remote_directory, tar_file) + else + tar_file + end + end + + def create_attributes + attrs = { + key: remote_target, + body: File.open(File.join(backup_path, tar_file)), + multipart_chunk_size: Gitlab.config.backup.upload.multipart_chunk_size, + storage_class: Gitlab.config.backup.upload.storage_class + }.merge(encryption_attributes) + + # Google bucket-only policies prevent setting an ACL. In any case, by default, + # all objects are set to the default ACL, which is project-private: + # https://cloud.google.com/storage/docs/json_api/v1/defaultObjectAccessControls + attrs[:public] = false unless google_provider? + + attrs + end + + def encryption_attributes + return object_storage_config.fog_attributes if object_storage_config.aws_server_side_encryption_enabled? + + # Use customer-managed keys. Also, this preserves backward-compatibility + # for existing use of Amazon S3-Managed Keys (SSE-S3) that don't set + # `backup.upload.storage_options.server_side_encryption` to `'AES256'`. + # + # AWS supports three different modes for encrypting S3 data: + # + # 1. Server-Side Encryption with Amazon S3-Managed Keys (SSE-S3) + # 2. Server-Side Encryption with Customer Master Keys (CMKs) Stored in AWS + # Key Management Service (SSE-KMS) + # 3. Server-Side Encryption with Customer-Provided Keys (SSE-C) + # + # Previously, SSE-S3 and SSE-C were supported via the + # `backup.upload.encryption` and `backup.upload.encryption_key` + # configuration options. + # + # SSE-KMS was previously not supported in backups because there was no way + # to specify which customer-managed key to use. However, we did support + # SSE-KMS with consolidated object storage enabled for other CI artifacts, + # attachments, LFS, etc. Note that SSE-C is NOT supported here. + # + # In consolidated object storage, the `storage_options` Hash provides the + # `server_side_encryption` and `server_side_encryption_kms_key_id` + # parameters that allow admins to configure SSE-KMS. We reuse this + # configuration in backups to support SSE-KMS. + { + encryption_key: Gitlab.config.backup.upload.encryption_key, + encryption: Gitlab.config.backup.upload.encryption + } + end + + def google_provider? + Gitlab.config.backup.upload.connection&.provider&.downcase == 'google' + end + + private + + def connect_to_remote_directory + connection = ::Fog::Storage.new(object_storage_config.credentials) + + # We only attempt to create the directory for local backups. For AWS + # and other cloud providers, we cannot guarantee the user will have + # permission to create the bucket. + if connection.service == ::Fog::Storage::Local + connection.directories.create(key: remote_directory) + else + connection.directories.new(key: remote_directory) + end + end + + # The remote 'directory' to store your backups. For S3, this would be the bucket name. + # @example Configuration setting the S3 bucket name + # remote_directory: 'my.s3.bucket' + def remote_directory + Gitlab.config.backup.upload.remote_directory + end + + def object_storage_config + @object_storage_config ||= ObjectStorage::Config.new(Gitlab.config.backup.upload) + end + + # TODO: This is a temporary workaround for bad design in Backup::Manager + # Output related code would be moved to a new location + def puts_time(msg) + progress.puts "#{Time.current} -- #{msg}" + Gitlab::BackupLogger.info(message: Rainbow.uncolor(msg)) + end + + # TODO: This is a temporary workaround for bad design in Backup::Manager + def tar_file + @tar_file ||= "#{backup_id}#{Backup::Manager::FILE_NAME_SUFFIX}" + end + + # TODO: This is a temporary workaround for bad design in Backup::Manager + def backup_id + if options.backup_id.present? + File.basename(options.backup_id) + else + "#{backup_information[:backup_created_at].strftime('%s_%Y_%m_%d_')}#{backup_information[:gitlab_version]}" + end + end + + # TODO: This is a temporary workaround for bad design in Backup::Manager + def backup_path + Gitlab.config.backup.path + end + end +end diff --git a/lib/backup/repositories.rb b/lib/backup/repositories.rb index c3154ccfbb5816d179ab1c023460900c41362591..6d8d5272bfae0b8e9a2d96c4c85f739af58430d7 100644 --- a/lib/backup/repositories.rb +++ b/lib/backup/repositories.rb @@ -12,8 +12,8 @@ class Repositories < Task # @param [Array<String>] :storages Filter by specified storage names. Empty means all storages. # @param [Array<String>] :paths Filter by specified project paths. Empty means all projects, groups, and snippets. # @param [Array<String>] :skip_paths Skip specified project paths. Empty means all projects, groups, and snippets. - def initialize(progress, strategy:, storages: [], paths: [], skip_paths: []) - super(progress) + def initialize(progress, strategy:, options:, storages: [], paths: [], skip_paths: []) + super(progress, options: options) @strategy = strategy @storages = storages diff --git a/lib/backup/task.rb b/lib/backup/task.rb index 65059f3a3cba71b7929e5dc0f282a357ac3aae16..b5f6c9e63309fb3a4396943e00bb4c33cf8b735b 100644 --- a/lib/backup/task.rb +++ b/lib/backup/task.rb @@ -2,8 +2,14 @@ module Backup class Task - def initialize(progress) + # Backup creation and restore option flags + # + # @return [Backup::Options] + attr_reader :options + + def initialize(progress, options:) @progress = progress + @options = options end # dump task backup to `path` diff --git a/spec/factories/gitlab/backup/options.rb b/spec/factories/gitlab/backup/options.rb new file mode 100644 index 0000000000000000000000000000000000000000..7cd7c5795ab03046bd1779aee0d79bfb537519f7 --- /dev/null +++ b/spec/factories/gitlab/backup/options.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :backup_options, class: 'Backup::Options' do + skip_create + + incremental { [false, true].sample } + force { [false, true].sample } + skippable_tasks { attributes_for(:backup_skippable_tasks) } + skippable_operations { attributes_for(:backup_skippable_operations) } + max_parallelism { rand(1..8) } + max_storage_parallelism { rand(1..8) } + repositories_server_side_backup { [false, true].sample } + compression_options { attributes_for(:backup_compression_options) } + gzip_rsyncable { [false, true].sample } + + trait :backup_id do + backup_id { '11493107454_2018_04_25_10.6.4-ce' } + end + + trait :previous_backup do + previous_backup { '11493107454_2018_04_25_10.6.4-ce' } + end + + trait :repositories_storages do + repositories_storages { %w[storage1 storage2] } + end + + trait :repositories_paths do + repositories_paths { %w[group-a group-b/project-c] } + end + + trait :skip_repositories_paths do + skip_repositories_paths { %w[group-a/project-d group-a/project-e] } + end + + trait :remote_directory do + remote_directory { %w[daily weekly monthly quarterly upgrade].sample } + end + + trait :all do + backup_id + previous_backup + repositories_storages + repositories_paths + skip_repositories_paths + remote_directory + skippable_tasks { attributes_for(:backup_skippable_tasks, :skip_all) } + skippable_operations { attributes_for(:backup_skippable_operations, :skip_all) } + compression_options { attributes_for(:backup_compression_options, :all) } + end + end + + factory :backup_skippable_tasks, class: 'Backup::Options::SkippableTasks' do + skip_create + + db { [false, true].sample } + uploads { [false, true].sample } + builds { [false, true].sample } + artifacts { [false, true].sample } + lfs { [false, true].sample } + terraform_state { [false, true].sample } + registry { [false, true].sample } + pages { [false, true].sample } + repositories { [false, true].sample } + packages { [false, true].sample } + ci_secure_files { [false, true].sample } + + trait :skip_all do + db { true } + uploads { true } + builds { true } + artifacts { true } + lfs { true } + terraform_state { true } + registry { true } + pages { true } + repositories { true } + packages { true } + ci_secure_files { true } + end + + trait :skip_none do + db { false } + uploads { false } + builds { false } + artifacts { false } + lfs { false } + terraform_state { false } + registry { false } + pages { false } + repositories { false } + packages { false } + ci_secure_files { false } + end + end + + factory :backup_skippable_operations, class: 'Backup::Options::SkippableOperations' do + skip_create + + archive { [false, true].sample } + remote_storage { [false, true].sample } + + trait :skip_all do + archive { true } + remote_storage { true } + end + + trait :skip_none do + archive { false } + remote_storage { false } + end + end + + factory :backup_compression_options, class: 'Backup::Options::CompressionOptions' do + skip_create + + trait :compression_cmd do + 'pigz --compress --stdout --fast --processes=4' + end + + trait :decompression_cmd do + 'pigz --decompress --stdout"' + end + + trait :all do + compression_cmd + decompression_cmd + end + end +end diff --git a/spec/lib/backup/database_spec.rb b/spec/lib/backup/database_spec.rb index 86468689f76e1191728aac49dc46603dddc2f3ba..7e023fda830ba8c1f01653b51cf221b6f601d86d 100644 --- a/spec/lib/backup/database_spec.rb +++ b/spec/lib/backup/database_spec.rb @@ -7,6 +7,7 @@ let(:progress_output) { progress.string } let(:backup_id) { 'some_id' } let(:one_database_configured?) { base_models_for_backup.one? } + let(:backup_options) { Backup::Options.new } let(:timeout_service) do instance_double(Gitlab::Database::TransactionTimeoutSettings, restore_timeouts: nil, disable_timeouts: nil) end @@ -28,7 +29,7 @@ describe '#dump', :delete do let(:force) { true } - subject { described_class.new(progress, force: force) } + subject { described_class.new(progress, force: force, options: backup_options) } it 'creates gzipped database dumps' do Dir.mktmpdir do |dir| @@ -128,7 +129,7 @@ let(:force) { true } let(:rake_task) { instance_double(Rake::Task, invoke: true) } - subject { described_class.new(progress, force: force) } + subject { described_class.new(progress, force: force, options: backup_options) } before do allow(Rake::Task).to receive(:[]).with(any_args).and_return(rake_task) diff --git a/spec/lib/backup/files_spec.rb b/spec/lib/backup/files_spec.rb index f0fc829764aebc5f5af331b0016867c719b4b4c9..3c96628b4cf78f2f72a61a6d4c911956be252f6c 100644 --- a/spec/lib/backup/files_spec.rb +++ b/spec/lib/backup/files_spec.rb @@ -5,6 +5,7 @@ RSpec.describe Backup::Files, feature_category: :backup_restore do let(:progress) { StringIO.new } let!(:project) { create(:project) } + let(:backup_options) { Backup::Options.new } let(:status_0) { double('exit 0', success?: true, exitstatus: 0) } let(:status_1) { double('exit 1', success?: false, exitstatus: 1) } @@ -39,7 +40,7 @@ end describe '#restore' do - subject { described_class.new(progress, '/var/gitlab-registry') } + subject { described_class.new(progress, '/var/gitlab-registry', options: backup_options) } let(:timestamp) { Time.utc(2017, 3, 22) } @@ -125,7 +126,9 @@ end describe '#dump' do - subject { described_class.new(progress, '/var/gitlab-pages', excludes: ['@pages.tmp']) } + subject do + described_class.new(progress, '/var/gitlab-pages', excludes: ['@pages.tmp'], options: backup_options) + end before do allow(subject).to receive(:run_pipeline!).and_return([[true, true], '']) @@ -222,7 +225,9 @@ end describe '#exclude_dirs' do - subject { described_class.new(progress, '/var/gitlab-pages', excludes: ['@pages.tmp']) } + subject do + described_class.new(progress, '/var/gitlab-pages', excludes: ['@pages.tmp'], options: backup_options) + end it 'prepends a leading dot slash to tar excludes' do expect(subject.exclude_dirs(:tar)).to eq(['--exclude=lost+found', '--exclude=./@pages.tmp']) @@ -234,7 +239,9 @@ end describe '#run_pipeline!' do - subject { described_class.new(progress, '/var/gitlab-registry') } + subject do + described_class.new(progress, '/var/gitlab-registry', options: backup_options) + end it 'executes an Open3.pipeline for cmd_list' do expect(Open3).to receive(:pipeline).with(%w[whew command], %w[another cmd], any_args) @@ -268,7 +275,9 @@ end describe '#pipeline_succeeded?' do - subject { described_class.new(progress, '/var/gitlab-registry') } + subject do + described_class.new(progress, '/var/gitlab-registry', options: backup_options) + end it 'returns true if both tar and gzip succeeeded' do expect( @@ -308,7 +317,9 @@ end describe '#tar_ignore_non_success?' do - subject { described_class.new(progress, '/var/gitlab-registry') } + subject do + described_class.new(progress, '/var/gitlab-registry', options: backup_options) + end context 'if `tar` command exits with 1 exitstatus' do it 'returns true' do @@ -356,7 +367,9 @@ end describe '#noncritical_warning?' do - subject { described_class.new(progress, '/var/gitlab-registry') } + subject do + described_class.new(progress, '/var/gitlab-registry', options: backup_options) + end it 'returns true if given text matches noncritical warnings list' do expect( diff --git a/spec/lib/backup/manager_spec.rb b/spec/lib/backup/manager_spec.rb index 8f85cd9d8b346539e3ae3eb783145f3982348884..1aba245cab3e74d31b8b72021a772ce74b314cbd 100644 --- a/spec/lib/backup/manager_spec.rb +++ b/spec/lib/backup/manager_spec.rb @@ -26,40 +26,40 @@ let(:task) { instance_double(Backup::Task) } let(:definitions) do { - 'my_task' => Backup::Manager::TaskDefinition.new( + 'terraform_state' => Backup::Manager::TaskDefinition.new( task: task, enabled: enabled, - destination_path: 'my_task.tar.gz', - human_name: 'my task' + destination_path: 'terraform_state.tar.gz', + human_name: 'terraform state' ) } end it 'calls the named task' do expect(task).to receive(:dump) - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping my task ... ') - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping my task ... done') + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping terraform state ... ') + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping terraform state ... done') - subject.run_create_task('my_task') + subject.run_create_task('terraform_state') end describe 'disabled' do let(:enabled) { false } it 'informs the user' do - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping my task ... [DISABLED]') + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping terraform state ... [DISABLED]') - subject.run_create_task('my_task') + subject.run_create_task('terraform_state') end end describe 'skipped' do it 'informs the user' do - stub_env('SKIP', 'my_task') + stub_env('SKIP', 'terraform_state') - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping my task ... [SKIPPED]') + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Dumping terraform state ... [SKIPPED]') - subject.run_create_task('my_task') + subject.run_create_task('terraform_state') end end end @@ -68,7 +68,7 @@ let(:enabled) { true } let(:pre_restore_warning) { nil } let(:post_restore_warning) { nil } - let(:definitions) { { 'my_task' => Backup::Manager::TaskDefinition.new(task: task, enabled: enabled, human_name: 'my task', destination_path: 'my_task.tar.gz') } } + let(:definitions) { { 'terraform_state' => Backup::Manager::TaskDefinition.new(task: task, enabled: enabled, human_name: 'terraform state', destination_path: 'terraform_state.tar.gz') } } let(:backup_information) { { backup_created_at: Time.zone.parse('2019-01-01'), gitlab_version: '12.3' } } let(:task) do instance_double(Backup::Task, @@ -77,27 +77,26 @@ end before do - allow(YAML).to receive(:safe_load_file).with( - File.join(Gitlab.config.backup.path, 'backup_information.yml'), - permitted_classes: described_class::YAML_PERMITTED_CLASSES) - .and_return(backup_information) + allow_next_instance_of(Backup::Metadata) do |metadata| + allow(metadata).to receive(:load_from_file).and_return(backup_information) + end end it 'calls the named task' do expect(task).to receive(:restore) - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... ').ordered - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... done').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... ').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... done').ordered - subject.run_restore_task('my_task') + subject.run_restore_task('terraform_state') end describe 'disabled' do let(:enabled) { false } it 'informs the user' do - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... [DISABLED]').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... [DISABLED]').ordered - subject.run_restore_task('my_task') + subject.run_restore_task('terraform_state') end end @@ -105,23 +104,23 @@ let(:pre_restore_warning) { 'Watch out!' } it 'displays and waits for the user' do - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... ').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... ').ordered expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Watch out!').ordered - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... done').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... done').ordered expect(Gitlab::TaskHelpers).to receive(:ask_to_continue) expect(task).to receive(:restore) - subject.run_restore_task('my_task') + subject.run_restore_task('terraform_state') end it 'does not continue when the user quits' do - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... ').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... ').ordered expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Watch out!').ordered expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Quitting...').ordered expect(Gitlab::TaskHelpers).to receive(:ask_to_continue).and_raise(Gitlab::TaskAbortedByUserError) expect do - subject.run_restore_task('my_task') + subject.run_restore_task('terraform_state') end.to raise_error(SystemExit) end end @@ -130,25 +129,25 @@ let(:post_restore_warning) { 'Watch out!' } it 'displays and waits for the user' do - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... ').ordered - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... done').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... ').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... done').ordered expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Watch out!').ordered expect(Gitlab::TaskHelpers).to receive(:ask_to_continue) expect(task).to receive(:restore) - subject.run_restore_task('my_task') + subject.run_restore_task('terraform_state') end it 'does not continue when the user quits' do - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... ').ordered - expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring my task ... done').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... ').ordered + expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Restoring terraform state ... done').ordered expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Watch out!').ordered expect(Gitlab::BackupLogger).to receive(:info).with(message: 'Quitting...').ordered expect(task).to receive(:restore) expect(Gitlab::TaskHelpers).to receive(:ask_to_continue).and_raise(Gitlab::TaskAbortedByUserError) expect do - subject.run_restore_task('my_task') + subject.run_restore_task('terraform_state') end.to raise_error(SystemExit) end end @@ -156,7 +155,7 @@ describe '#create' do let(:incremental_env) { 'false' } - let(:expected_backup_contents) { %w[backup_information.yml task1.tar.gz task2.tar.gz] } + let(:expected_backup_contents) { %w[backup_information.yml lfs.tar.gz pages.tar.gz] } let(:backup_time) { Time.zone.parse('2019-1-1') } let(:backup_id) { "1546300800_2019_01_01_#{Gitlab::VERSION}" } let(:full_backup_id) { backup_id } @@ -168,8 +167,8 @@ let(:task2) { instance_double(Backup::Task) } let(:definitions) do { - 'task1' => Backup::Manager::TaskDefinition.new(task: task1, human_name: 'task 1', destination_path: 'task1.tar.gz'), - 'task2' => Backup::Manager::TaskDefinition.new(task: task2, human_name: 'task 2', destination_path: 'task2.tar.gz') + 'lfs' => Backup::Manager::TaskDefinition.new(task: task1, human_name: 'lfs objects', destination_path: 'lfs.tar.gz'), + 'pages' => Backup::Manager::TaskDefinition.new(task: task2, human_name: 'pages', destination_path: 'pages.tar.gz') } end @@ -179,8 +178,8 @@ allow(Gitlab::BackupLogger).to receive(:info) allow(Kernel).to receive(:system).and_return(true) - allow(task1).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'task1.tar.gz'), backup_id) - allow(task2).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'task2.tar.gz'), backup_id) + allow(task1).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'lfs.tar.gz'), backup_id) + allow(task2).to receive(:dump).with(File.join(Gitlab.config.backup.path, 'pages.tar.gz'), backup_id) end it 'creates a backup tar' do @@ -223,10 +222,10 @@ end context 'when SKIP env is set' do - let(:expected_backup_contents) { %w[backup_information.yml task1.tar.gz] } + let(:expected_backup_contents) { %w[backup_information.yml lfs.tar.gz] } before do - stub_env('SKIP', 'task2') + stub_env('SKIP', 'pages') end it 'executes tar' do @@ -237,16 +236,16 @@ end context 'when the destination is optional' do - let(:expected_backup_contents) { %w[backup_information.yml task1.tar.gz] } + let(:expected_backup_contents) { %w[backup_information.yml lfs.tar.gz] } let(:definitions) do { - 'task1' => Backup::Manager::TaskDefinition.new(task: task1, destination_path: 'task1.tar.gz'), - 'task2' => Backup::Manager::TaskDefinition.new(task: task2, destination_path: 'task2.tar.gz', destination_optional: true) + 'lfs' => Backup::Manager::TaskDefinition.new(task: task1, destination_path: 'lfs.tar.gz'), + 'pages' => Backup::Manager::TaskDefinition.new(task: task2, destination_path: 'pages.tar.gz', destination_optional: true) } end it 'executes tar' do - expect(File).to receive(:exist?).with(File.join(Gitlab.config.backup.path, 'task2.tar.gz')).and_return(false) + expect(File).to receive(:exist?).with(File.join(Gitlab.config.backup.path, 'pages.tar.gz')).and_return(false) subject.create # rubocop:disable Rails/SaveBang @@ -411,7 +410,10 @@ before do allow(Gitlab::BackupLogger).to receive(:info) - allow(subject).to receive(:tar_file).and_return(backup_filename) + allow_next_instance_of(described_class) do |manager| + allow(manager).to receive(:tar_file).and_return(backup_filename) + allow(manager.remote_storage).to receive(:tar_file).and_return(backup_filename) + end stub_backup_setting( upload: { @@ -605,16 +607,14 @@ end expect(Kernel).not_to have_received(:system).with(*pack_tar_cmdline) - expect(YAML.safe_load_file( - File.join(Gitlab.config.backup.path, 'backup_information.yml'), - permitted_classes: described_class::YAML_PERMITTED_CLASSES)).to include( - backup_created_at: backup_time.localtime, - db_version: be_a(String), - gitlab_version: Gitlab::VERSION, - installation_type: Gitlab::INSTALLATION_TYPE, - skipped: 'tar', - tar_version: be_a(String) - ) + expect(subject.send(:backup_information).to_h).to include( + backup_created_at: backup_time.localtime, + db_version: be_a(String), + gitlab_version: Gitlab::VERSION, + installation_type: Gitlab::INSTALLATION_TYPE, + skipped: 'tar', + tar_version: be_a(String) + ) expect(FileUtils).to have_received(:rm_rf).with(File.join(Gitlab.config.backup.path, 'tmp')) end end @@ -633,11 +633,9 @@ end before do - allow(YAML).to receive(:safe_load_file).and_call_original - allow(YAML).to receive(:safe_load_file).with( - File.join(Gitlab.config.backup.path, 'backup_information.yml'), - permitted_classes: described_class::YAML_PERMITTED_CLASSES) - .and_return(backup_information) + allow_next_instance_of(Backup::Metadata) do |metadata| + allow(metadata).to receive(:load_from_file).and_return(backup_information) + end end context 'when there are no backup files in the directory' do @@ -882,7 +880,7 @@ ] ) allow(File).to receive(:exist?).with(File.join(Gitlab.config.backup.path, 'backup_information.yml')).and_return(true) - stub_env('SKIP', 'something') + stub_env('SKIP', 'pages') end after do @@ -898,13 +896,11 @@ .with(a_string_matching('Non tarred backup found ')) expect(progress).to have_received(:puts) .with(a_string_matching("Backup #{backup_id} is done")) - expect(YAML.safe_load_file(File.join(Gitlab.config.backup.path, 'backup_information.yml'), - permitted_classes: described_class::YAML_PERMITTED_CLASSES)).to include( - backup_created_at: backup_time, - full_backup_id: full_backup_id, - gitlab_version: Gitlab::VERSION, - skipped: 'something,tar' - ) + expect(subject.send(:backup_information).to_h).to include( + backup_created_at: backup_time, + full_backup_id: full_backup_id, + gitlab_version: Gitlab::VERSION, + skipped: 'tar,pages') end context 'on version mismatch' do @@ -930,8 +926,8 @@ let(:task2) { instance_double(Backup::Task, pre_restore_warning: nil, post_restore_warning: nil) } let(:definitions) do { - 'task1' => Backup::Manager::TaskDefinition.new(task: task1, human_name: 'task 1', destination_path: 'task1.tar.gz'), - 'task2' => Backup::Manager::TaskDefinition.new(task: task2, human_name: 'task 2', destination_path: 'task2.tar.gz') + 'lfs' => Backup::Manager::TaskDefinition.new(task: task1, human_name: 'lfs content', destination_path: 'lfs.tar.gz'), + 'pages' => Backup::Manager::TaskDefinition.new(task: task2, human_name: 'pages', destination_path: 'pages.tar.gz') } end @@ -950,11 +946,11 @@ Rake.application.rake_require 'tasks/cache' allow(Gitlab::BackupLogger).to receive(:info) - allow(task1).to receive(:restore).with(File.join(Gitlab.config.backup.path, 'task1.tar.gz'), backup_id) - allow(task2).to receive(:restore).with(File.join(Gitlab.config.backup.path, 'task2.tar.gz'), backup_id) - allow(YAML).to receive(:safe_load_file).with(File.join(Gitlab.config.backup.path, 'backup_information.yml'), - permitted_classes: described_class::YAML_PERMITTED_CLASSES) - .and_return(backup_information) + allow(task1).to receive(:restore).with(File.join(Gitlab.config.backup.path, 'lfs.tar.gz'), backup_id) + allow(task2).to receive(:restore).with(File.join(Gitlab.config.backup.path, 'pages.tar.gz'), backup_id) + allow_next_instance_of(Backup::Metadata) do |metadata| + allow(metadata).to receive(:load_from_file).and_return(backup_information) + end allow(Rake::Task['gitlab:shell:setup']).to receive(:invoke) allow(Rake::Task['cache:clear']).to receive(:invoke) end diff --git a/spec/lib/backup/options_spec.rb b/spec/lib/backup/options_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..970eea134ddab6e74b2c3ffe1c96b9c585cc12bc --- /dev/null +++ b/spec/lib/backup/options_spec.rb @@ -0,0 +1,275 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Backup::Options, feature_category: :backup_restore do + include StubENV + + subject(:options) { described_class.new } + + context 'with accessors' do + describe 'backup_id' do + it { is_expected.to respond_to :backup_id } + it { is_expected.to respond_to :backup_id= } + end + + describe 'previous_backup' do + it { is_expected.to respond_to :previous_backup } + it { is_expected.to respond_to :previous_backup= } + end + + describe 'incremental' do + it { is_expected.to respond_to :incremental } + it { is_expected.to respond_to :incremental= } + end + + describe 'force' do + it { is_expected.to respond_to :force } + it { is_expected.to respond_to :force= } + end + + describe 'skippable_tasks' do + it { is_expected.to respond_to :skippable_tasks } + it { is_expected.to respond_to :skippable_tasks= } + end + + describe 'skippable_operations' do + it { is_expected.to respond_to :skippable_operations } + it { is_expected.to respond_to :skippable_operations= } + end + + describe 'max_parallelism' do + it { is_expected.to respond_to :max_parallelism } + it { is_expected.to respond_to :max_parallelism= } + end + + describe 'max_storage_parallelism' do + it { is_expected.to respond_to :max_storage_parallelism } + it { is_expected.to respond_to :max_storage_parallelism= } + end + + describe 'repositories_storages' do + it { is_expected.to respond_to :repositories_storages } + it { is_expected.to respond_to :repositories_storages= } + end + + describe 'repositories_paths' do + it { is_expected.to respond_to :repositories_paths } + it { is_expected.to respond_to :repositories_paths= } + end + + describe 'skip_repositories_paths' do + it { is_expected.to respond_to :skip_repositories_paths } + it { is_expected.to respond_to :skip_repositories_paths= } + end + + describe 'repositories_server_side_backup' do + it { is_expected.to respond_to :repositories_server_side_backup } + it { is_expected.to respond_to :repositories_server_side_backup= } + end + + describe 'remote_directory' do + it { is_expected.to respond_to :remote_directory } + it { is_expected.to respond_to :remote_directory= } + end + + describe 'compression_options' do + it { is_expected.to respond_to :compression_options } + it { is_expected.to respond_to :compression_options= } + end + + describe 'gzip_rsyncable' do + it { is_expected.to respond_to :gzip_rsyncable } + it { is_expected.to respond_to :gzip_rsyncable= } + end + end + + describe '#initialize' do + it 'can be initialized without providing any parameter' do + expect { described_class.new }.not_to raise_exception + end + + it 'can be initialized with all valid parameters' do + expect { FactoryBot.build(:backup_options, :all) }.not_to raise_exception + end + end + + describe '#extract_from_env!' do + it 'extracts BACKUP env' do + env_value = '11493107454_2018_04_25_10.6.4-ce' + stub_env('BACKUP' => env_value) + + expect { options.extract_from_env! }.to change { options.backup_id }.to(env_value) + end + + it 'extracts PREVIOUS_BACKUP env' do + env_value = '11493107454_2018_04_25_10.6.4-ce' + stub_env('PREVIOUS_BACKUP' => env_value) + + expect { options.extract_from_env! }.to change { options.previous_backup }.to(env_value) + end + + it 'extracts INCREMENTAL env' do + stub_env('INCREMENTAL' => 'yes') + + expect { options.extract_from_env! }.to change { options.incremental }.to(true) + end + + it 'extracts FORCE env' do + stub_env('FORCE' => 'yes') + + expect { options.extract_from_env! }.to change { options.force }.to(true) + end + + it 'extracts GITLAB_BACKUP_MAX_CONCURRENCY env' do + stub_env('GITLAB_BACKUP_MAX_CONCURRENCY' => '8') + + expect { options.extract_from_env! }.to change { options.max_parallelism }.to(8) + end + + it 'extracts GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY env' do + stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY' => '3') + + expect { options.extract_from_env! }.to change { options.max_storage_parallelism }.to(3) + end + + it 'extracts DIRECTORY env' do + directory = 'daily' + stub_env('DIRECTORY' => directory) + + expect { options.extract_from_env! }.to change { options.remote_directory }.to(directory) + end + + it 'extracts REPOSITORIES_SERVER_SIDE env' do + stub_env('REPOSITORIES_SERVER_SIDE' => 'yes') + + expect { options.extract_from_env! }.to change { options.repositories_server_side_backup }.to(true) + end + + it 'extracts REPOSITORIES_STORAGES env' do + stub_env('REPOSITORIES_STORAGES' => 'storage1,storage2') + + expect { options.extract_from_env! }.to change { options.repositories_storages }.to(%w[storage1 storage2]) + end + + it 'extracts REPOSITORIES_PATHS env' do + stub_env('REPOSITORIES_PATHS' => 'group-a,group-b/project-c') + + expect { options.extract_from_env! }.to change { options.repositories_paths }.to(%w[group-a group-b/project-c]) + end + + it 'extracts SKIP_REPOSITORIES_PATHS env' do + stub_env('SKIP_REPOSITORIES_PATHS' => 'group-a/project-d,group-a/project-e') + + expect { options.extract_from_env! }.to change { + options.skip_repositories_paths + }.to(%w[group-a/project-d group-a/project-e]) + end + + it 'extracts COMPRESS_CMD env' do + cmd = 'pigz --compress --stdout --fast --processes=4' + stub_env('COMPRESS_CMD' => cmd) + + expect { options.extract_from_env! }.to change { options.compression_options.compression_cmd }.to(cmd) + end + + it 'extracts DECOMPRESS_CMD env' do + cmd = 'pigz --decompress --stdout"' + stub_env('DECOMPRESS_CMD' => cmd) + + expect { options.extract_from_env! }.to change { options.compression_options.decompression_cmd }.to(cmd) + end + + it 'extracts GZIP_RSYNCABLE env' do + stub_env('GZIP_RSYNCABLE' => 'yes') + + expect { options.extract_from_env! }.to change { options.gzip_rsyncable }.to(true) + end + + it 'delegates to extract_skippables! when SKIP env is present' do + stub_env('SKIP' => 'db') + expect(options).to receive(:extract_skippables!) + + options.extract_from_env! + end + + it 'does not call extract_skippables! when SKIP env is missing' do + stub_env('SKIP' => nil) + expect(options).not_to receive(:extract_skippables!) + + options.extract_from_env! + end + end + + describe '#extract_skippables!' do + let(:skippable_field) do + 'tar,remote,db,uploads,builds,artifacts,lfs,terraform_state,registry,pages,repositories,packages,ci_secure_files' + end + + context 'for skippable operations' do + it 'parses skippable tar input' do + expect do + options.extract_skippables!(skippable_field) + end.to change { options.skippable_operations.archive }.to(true) + end + + it 'parses skippable remote input' do + expect do + options.extract_skippables!(skippable_field) + end.to change { options.skippable_operations.remote_storage }.to(true) + end + end + + context 'for skippable tasks' do + it 'parses skippable db input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.db }.to(true) + end + + it 'parses skippable uploads input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.uploads }.to(true) + end + + it 'parses skippable builds input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.builds }.to(true) + end + + it 'parses skippable artifacts input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.artifacts }.to(true) + end + + it 'parses skippable lfs input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.lfs }.to(true) + end + + it 'parses skippable terraform_state input' do + expect do + options.extract_skippables!(skippable_field) + end.to change { options.skippable_tasks.terraform_state }.to(true) + end + + it 'parses skippable registry input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.registry }.to(true) + end + + it 'parses skippable pages input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.pages }.to(true) + end + + it 'parses skippable repositories input' do + expect do + options.extract_skippables!(skippable_field) + end.to change { options.skippable_tasks.repositories }.to(true) + end + + it 'parses skippable packages input' do + expect { options.extract_skippables!(skippable_field) }.to change { options.skippable_tasks.packages }.to(true) + end + + it 'parses skippable ci_secure_files input' do + expect do + options.extract_skippables!(skippable_field) + end.to change { options.skippable_tasks.ci_secure_files }.to(true) + end + end + end +end diff --git a/spec/lib/backup/repositories_spec.rb b/spec/lib/backup/repositories_spec.rb index 679be62393e6c6ece8e2bee3ad07ea25e1be9a86..e63d321495e74567fe62d16a493f7873498e2e60 100644 --- a/spec/lib/backup/repositories_spec.rb +++ b/spec/lib/backup/repositories_spec.rb @@ -10,11 +10,13 @@ let(:skip_paths) { [] } let(:destination) { 'repositories' } let(:backup_id) { 'backup_id' } + let(:backup_options) { Backup::Options.new } subject do described_class.new( progress, strategy: strategy, + options: backup_options, storages: storages, paths: paths, skip_paths: skip_paths diff --git a/spec/lib/backup/task_spec.rb b/spec/lib/backup/task_spec.rb index 370d9e4a64f491cb9093ec5a44b568f078f7ab6e..5ded16cd52b8a7eb2767c471aed6ffee64393afc 100644 --- a/spec/lib/backup/task_spec.rb +++ b/spec/lib/backup/task_spec.rb @@ -1,11 +1,12 @@ # frozen_string_literal: true -require 'fast_spec_helper' +require 'spec_helper' -RSpec.describe Backup::Task do +RSpec.describe Backup::Task, feature_category: :backup_restore do let(:progress) { StringIO.new } + let(:backup_options) { build(:backup_options) } - subject { described_class.new(progress) } + subject { described_class.new(progress, options: backup_options) } describe '#dump' do it 'must be implemented by the subclass' do diff --git a/spec/tasks/gitlab/backup_rake_spec.rb b/spec/tasks/gitlab/backup_rake_spec.rb index 4f27ba57bcbda3345b287810554674bb8e41135a..42699239d870c10b46e42970ff3e42608f0186cf 100644 --- a/spec/tasks/gitlab/backup_rake_spec.rb +++ b/spec/tasks/gitlab/backup_rake_spec.rb @@ -565,7 +565,7 @@ def reenable_backup_sub_tasks stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2) expect(::Backup::Repositories).to receive(:new) - .with(anything, strategy: anything, storages: [], paths: [], skip_paths: []) + .with(anything, strategy: anything, options: anything, storages: [], paths: [], skip_paths: []) .and_call_original expect(::Backup::GitalyBackup).to receive(:new).with( anything,