Skip to content
代码片段 群组 项目
未验证 提交 4aa8f00e 编辑于 作者: Aakriti Gupta's avatar Aakriti Gupta 提交者: GitLab
浏览文件

Migrate and refactor files and repo targets to unified backups

上级 b279019c
No related branches found
No related tags found
无相关合并请求
显示
541 个添加51 个删除
......@@ -20,6 +20,7 @@ module Cli
autoload :GitlabConfig, 'gitlab/backup/cli/gitlab_config'
autoload :Metadata, 'gitlab/backup/cli/metadata'
autoload :Output, 'gitlab/backup/cli/output'
autoload :RepoType, 'gitlab/backup/cli/repo_type'
autoload :RestoreExecutor, 'gitlab/backup/cli/restore_executor'
autoload :Runner, 'gitlab/backup/cli/runner'
autoload :Shell, 'gitlab/backup/cli/shell'
......
......@@ -51,20 +51,17 @@ def build_metadata
end
def execute_all_tasks
# TODO: when we migrate targets to the new codebase, recreate options to have only what we need here
# https://gitlab.com/gitlab-org/gitlab/-/issues/454906
options = ::Backup::Options.new(
remote_directory: backup_bucket,
container_registry_bucket: registry_bucket,
service_account_file: service_account_file
)
tasks = []
Gitlab::Backup::Cli::Tasks.build_each(context: context, options: options) do |task|
Gitlab::Backup::Cli::Tasks.build_each(context: context) do |task|
# This is a temporary hack while we move away from options and use config instead
# This hack will be removed as part of https://gitlab.com/gitlab-org/gitlab/-/issues/498455
task.set_registry_bucket(registry_bucket) if task.is_a?(Gitlab::Backup::Cli::Tasks::Registry)
Gitlab::Backup::Cli::Output.info("Executing Backup of #{task.human_name}...")
duration = measure_duration do
task.backup!(workdir, metadata.backup_id)
task.backup!(workdir)
tasks << task
end
......
......@@ -104,7 +104,7 @@ def upload_path
end
def config(object_type)
Gitlab.config[object_type]
gitlab_config[object_type]
end
def env
......@@ -112,6 +112,18 @@ def env
ENV["RAILS_ENV"].presence || ENV["RACK_ENV"].presence || "development")
end
def config_repositories_storages
gitlab_config.dig(env, 'repositories', 'storages')
end
def gitaly_backup_path
gitlab_config.dig(env, 'backup', 'gitaly_backup_path')
end
def gitaly_token
gitlab_config.dig(env, 'gitaly', 'token')
end
private
# Return the shared path used as a fallback base location to each blob type
......
......@@ -6,6 +6,8 @@ module Cli
module Errors
autoload :DatabaseBackupError, 'gitlab/backup/cli/errors/database_backup_error'
autoload :FileBackupError, 'gitlab/backup/cli/errors/file_backup_error'
autoload :FileRestoreError, 'gitlab/backup/cli/errors/file_restore_error'
autoload :GitalyBackupError, 'gitlab/backup/cli/errors/gitaly_backup_error'
end
end
end
......
# frozen_string_literal: true
module Gitlab
module Backup
module Cli
module Errors
class FileRestoreError < StandardError
attr_reader :error_message
def initialize(error_message:)
super
@error_message = error_message
end
def message
"Restore operation failed: #{error_message}"
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Backup
module Cli
module Errors
class GitalyBackupError < StandardError
attr_reader :error_message
def initialize(error_message = '')
super
@error_message = error_message
end
def message
"Repository Backup/Restore failed. #{error_message}"
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Backup
module Cli
class RepoType
PROJECT = :project
WIKI = :wiki
SNIPPET = :snippet
DESIGN = :design
end
end
end
end
......@@ -41,10 +41,6 @@ def execute
execute_all_tasks
end
def backup_options
@backup_options ||= build_backup_options!
end
def metadata
@metadata ||= read_metadata!
end
......@@ -57,14 +53,16 @@ def release!
private
def execute_all_tasks
# TODO: when we migrate targets to the new codebase, recreate options to have only what we need here
# https://gitlab.com/gitlab-org/gitlab/-/issues/454906
tasks = []
Gitlab::Backup::Cli::Tasks.build_each(context: context, options: backup_options) do |task|
Gitlab::Backup::Cli::Tasks.build_each(context: context) do |task|
# This is a temporary hack while we move away from options and use config instead
# This hack will be removed as part of https://gitlab.com/gitlab-org/gitlab/-/issues/498455
task.set_registry_bucket(registry_bucket) if task.is_a?(Gitlab::Backup::Cli::Tasks::Registry)
Gitlab::Backup::Cli::Output.info("Executing restoration of #{task.human_name}...")
duration = measure_duration do
tasks << { name: task.human_name, result: task.restore!(archive_directory, backup_id) }
tasks << { name: task.human_name, result: task.restore!(archive_directory) }
end
next if task.object_storage?
......@@ -87,15 +85,6 @@ def read_metadata!
@metadata = Gitlab::Backup::Cli::Metadata::BackupMetadata.load!(archive_directory)
end
def build_backup_options!
::Backup::Options.new(
backup_id: backup_id,
remote_directory: backup_bucket,
container_registry_bucket: registry_bucket,
service_account_file: service_account_file
)
end
# @return [Pathname] temporary directory
def create_temporary_workdir!
# Ensure base directory exists
......
......@@ -6,7 +6,11 @@ module Cli
module Targets
autoload :Target, 'gitlab/backup/cli/targets/target'
autoload :Database, 'gitlab/backup/cli/targets/database'
autoload :Files, 'gitlab/backup/cli/targets/files'
autoload :ObjectStorage, 'gitlab/backup/cli/targets/object_storage'
autoload :GitalyBackup, 'gitlab/backup/cli/targets/gitaly_backup'
autoload :GitalyClient, 'gitlab/backup/cli/targets/gitaly_client'
autoload :Repositories, 'gitlab/backup/cli/targets/repositories'
end
end
end
......
......@@ -17,14 +17,16 @@ class Database < Target
].freeze
IGNORED_ERRORS_REGEXP = Regexp.union(IGNORED_ERRORS).freeze
def initialize(options:)
super(options: options)
def initialize
@errors = []
@force = options.force?
# This flag will be removed as part of https://gitlab.com/gitlab-org/gitlab/-/issues/494209
# This option will be reintroduced as part of
# https://gitlab.com/gitlab-org/gitlab/-/issues/498453
@force = false
end
def dump(destination_dir, _)
def dump(destination_dir)
FileUtils.mkdir_p(destination_dir)
each_database(destination_dir) do |backup_connection|
......@@ -74,7 +76,7 @@ def dump(destination_dir, _)
end
end
def restore(destination_dir, _)
def restore(destination_dir)
@errors = []
base_models_for_backup.each do |database_name, _|
......
# frozen_string_literal: true
module Gitlab
module Backup
module Cli
module Targets
class Files < Target
DEFAULT_EXCLUDE = ['lost+found'].freeze
attr_reader :excludes
# @param [String] storage_path
# @param [Array] excludes
def initialize(context, storage_path, excludes: [])
super(context)
@storage_path = storage_path
@excludes = excludes
end
def dump(destination)
archive_file = [destination, 'w', 0o600]
tar_command = Utils::Tar.new.pack_from_stdin_cmd(
target_directory: storage_realpath,
target: '.',
excludes: excludes)
compression_cmd = Utils::Compression.compression_command
pipeline = Shell::Pipeline.new(tar_command, compression_cmd)
result = pipeline.run!(output: archive_file)
return if success?(result)
raise Errors::FileBackupError.new(storage_realpath, destination)
end
def restore(source)
# Existing files will be handled in https://gitlab.com/gitlab-org/gitlab/-/issues/499876
if File.exist?(storage_realpath)
Output.warning "Ignoring existing files at #{storage_realpath} and continuing restore."
end
archive_file = source.to_s
tar_command = Utils::Tar.new.extract_from_stdin_cmd(target_directory: storage_realpath)
decompression_cmd = Utils::Compression.decompression_command
pipeline = Shell::Pipeline.new(decompression_cmd, tar_command)
result = pipeline.run!(input: archive_file)
return if success?(result)
raise Errors::FileRestoreError.new(error_message: result.stderr)
end
private
def success?(result)
return true if result.success?
return true if ignore_non_success?(
result.status_list[1].exitstatus,
result.stderr
)
false
end
def noncritical_warning_matcher
/^g?tar: \.: Cannot mkdir: No such file or directory$/
end
def ignore_non_success?(exitstatus, output)
# tar can exit with nonzero code:
# 1 - if some files changed (i.e. a CI job is currently writes to log)
# 2 - if it cannot create `.` directory (see issue https://gitlab.com/gitlab-org/gitlab/-/issues/22442)
# http://www.gnu.org/software/tar/manual/html_section/tar_19.html#Synopsis
# so check tar status 1 or stderr output against some non-critical warnings
if exitstatus == 1
Output.print_info "Ignoring tar exit status 1 'Some files differ': #{output}"
return true
end
# allow tar to fail with other non-success status if output contain non-critical warning
if noncritical_warning_matcher&.match?(output)
Output.print_info(
"Ignoring non-success exit status #{exitstatus} due to output of non-critical warning(s): #{output}")
return true
end
false
end
def storage_realpath
@storage_realpath ||= File.realpath(@storage_path)
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Backup
module Cli
module Targets
class GitalyBackup
# Backup and restores repositories using gitaly-backup
#
# gitaly-backup can work in parallel and accepts a list of repositories
# through input pipe using a specific json format for both backup and restore
attr_reader :context
def initialize(context)
@context = context
end
def start(type, backup_repos_path, backup_id: nil, remove_all_repositories: nil)
raise Gitlab::Backup::Cli::Errors::GitalyBackupError, 'already started' if started?
FileUtils.rm_rf(backup_repos_path) if type == :create
@input_stream, stdout, @thread = Open3.popen2(
build_env,
bin_path,
*gitaly_backup_args(type, backup_repos_path.to_s, backup_id, remove_all_repositories)
)
@out_reader = Thread.new do
IO.copy_stream(stdout, $stdout)
end
end
def finish!
return unless started?
@input_stream.close
@thread.join
status = @thread.value
@thread = nil
return unless status.exitstatus != 0
raise Gitlab::Backup::Cli::Errors::GitalyBackupError,
"gitaly-backup exit status #{status.exitstatus}"
end
def enqueue(container, repo_type)
raise Gitlab::Backup::Cli::Errors::GitalyBackupError, 'not started' unless started?
raise Gitlab::Backup::Cli::Errors::GitalyBackupError, 'no container for repo type' unless container
storage, relative_path, gl_project_path, always_create = repository_info_for(container, repo_type)
schedule_backup_job(storage, relative_path, gl_project_path, always_create)
end
private
def repository_info_for(container, repo_type)
case repo_type
when RepoType::PROJECT
[container.repository_storage,
container.disk_path || container.full_path,
container.full_path,
true]
when RepoType::WIKI
wiki_repo_info(container)
when RepoType::SNIPPET
[container.repository_storage,
container.disk_path || container.full_path,
container.full_path,
false]
when RepoType::DESIGN
[design_repo_storage(container),
container.project.disk_path,
container.project.full_path,
false]
end
end
def design_repo_storage(container)
return container.repository.repository_storage if container.repository.respond_to?(:repository_storage)
container.repository_storage
end
def wiki_repo_info(container)
wiki = container.respond_to?(:wiki) ? container.wiki : container
[wiki.repository_storage,
wiki.disk_path || wiki.full_path,
wiki.full_path,
false]
end
def gitaly_backup_args(type, backup_repos_path, backup_id, remove_all_repositories)
command = case type
when :create
'create'
when :restore
'restore'
else
raise Gitlab::Backup::Cli::Errors::GitalyBackupError, "unknown backup type: #{type}"
end
args = [command] + ['-path', backup_repos_path, '-layout', 'manifest']
case type
when :create
args += ['-id', backup_id] if backup_id
when :restore
args += ['-remove-all-repositories', remove_all_repositories.join(',')] if remove_all_repositories
args += ['-id', backup_id] if backup_id
end
args
end
# Schedule a new backup job through a non-blocking JSON based pipe protocol
#
# @see https://gitlab.com/gitlab-org/gitaly/-/blob/master/doc/gitaly-backup.md
def schedule_backup_job(storage, relative_path, gl_project_path, always_create)
json_job = {
storage_name: storage,
relative_path: relative_path,
gl_project_path: gl_project_path,
always_create: always_create
}.to_json
@input_stream.puts(json_job)
end
def gitaly_servers
storages = context.config_repositories_storages
unless storages.keys
raise Gitlab::Backup::Cli::Errors::GitalyBackupError,
"No repositories' storages found."
end
storages.keys.index_with do |storage_name|
GitalyClient.new(storages, context.gitaly_token).connection_data(storage_name)
end
end
def gitaly_servers_encoded
Base64.strict_encode64(JSON.dump(gitaly_servers))
end
# These variables will be moved to a config file via
# https://gitlab.com/gitlab-org/gitlab/-/issues/500437
def default_cert_dir
ENV.fetch('SSL_CERT_DIR', OpenSSL::X509::DEFAULT_CERT_DIR)
end
def default_cert_file
ENV.fetch('SSL_CERT_FILE', OpenSSL::X509::DEFAULT_CERT_FILE)
end
def build_env
{
'SSL_CERT_FILE' => default_cert_file,
'SSL_CERT_DIR' => default_cert_dir,
'GITALY_SERVERS' => gitaly_servers_encoded
}.merge(current_env)
end
def current_env
ENV
end
def started?
@thread.present?
end
def bin_path
unless context.gitaly_backup_path.present?
raise Gitlab::Backup::Cli::Errors::GitalyBackupError,
'gitaly-backup binary not found and gitaly_backup_path is not configured'
end
File.absolute_path(context.gitaly_backup_path)
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Backup
module Cli
module Targets
class GitalyClient
attr_reader :storages, :gitaly_token
def initialize(storages, gitaly_token)
@storages = storages
@gitaly_token = gitaly_token
end
def connection_data(storage)
raise "storage not found: #{storage.inspect}" if storages[storage].nil?
{ 'address' => address(storage), 'token' => token(storage) }
end
private
def address(storage)
address = storages[storage]['gitaly_address']
raise "storage #{storage.inspect} is missing a gitaly_address" unless address.present?
unless %w[tcp unix tls dns].include?(URI(address).scheme)
raise "Unsupported Gitaly address: " \
"#{address.inspect} does not use URL scheme 'tcp' or 'unix' or 'tls' or 'dns'"
end
address
end
def token(storage)
storages[storage]['gitaly_token'].presence || gitaly_token
end
end
end
end
end
end
......@@ -12,14 +12,15 @@ class Google < Target
attr_accessor :object_type, :backup_bucket, :client, :config, :results
def initialize(object_type, options, config)
def initialize(object_type, remote_directory, config)
@object_type = object_type
@backup_bucket = options.remote_directory
@backup_bucket = remote_directory
@config = config
@client = ::Google::Cloud::StorageTransfer.storage_transfer_service
end
def dump(_, backup_id)
# @param [String] backup_id unique identifier for the backup
def dump(backup_id)
response = find_or_create_job(backup_id, "backup")
run_request = {
project_id: backup_job_spec(backup_id)[:project_id],
......@@ -28,7 +29,8 @@ def dump(_, backup_id)
@results = client.run_transfer_job run_request
end
def restore(_, backup_id)
# @param [String] backup_id unique identifier for the backup
def restore(backup_id)
response = find_or_create_job(backup_id, "restore")
run_request = {
project_id: restore_job_spec(backup_id)[:project_id],
......
# frozen_string_literal: true
require 'yaml'
module Gitlab
module Backup
module Cli
module Targets
# Backup and restores repositories by querying the database
class Repositories < Target
def dump(destination)
strategy.start(:create, destination)
enqueue_consecutive
ensure
strategy.finish!
end
def restore(source)
strategy.start(:restore,
source,
remove_all_repositories: remove_all_repositories)
enqueue_consecutive
ensure
strategy.finish!
restore_object_pools
end
def strategy
@strategy ||= GitalyBackup.new(context)
end
private
def remove_all_repositories
context.config_repositories_storages.keys
end
def enqueue_consecutive
enqueue_consecutive_projects
enqueue_consecutive_snippets
end
def enqueue_consecutive_projects
project_relation.find_each(batch_size: 1000) do |project|
enqueue_project(project)
end
end
def enqueue_consecutive_snippets
snippet_relation.find_each(batch_size: 1000) { |snippet| enqueue_snippet(snippet) }
end
def enqueue_project(project)
strategy.enqueue(project, Gitlab::Backup::Cli::RepoType::PROJECT)
strategy.enqueue(project, Gitlab::Backup::Cli::RepoType::WIKI)
return unless project.design_management_repository
strategy.enqueue(project.design_management_repository, Gitlab::Backup::Cli::RepoType::DESIGN)
end
def enqueue_snippet(snippet)
strategy.enqueue(snippet, Gitlab::Backup::Cli::RepoType::SNIPPET)
end
def project_relation
Project.includes(:route, :group, :namespace)
end
def snippet_relation
Snippet.all
end
def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool|
Output.info " - Object pool #{pool.disk_path}..."
unless pool.source_project
Output.info " - Object pool #{pool.disk_path}... [SKIPPED]"
next
end
pool.state = 'none'
pool.save
pool.schedule
end
end
end
end
end
end
end
......@@ -6,14 +6,10 @@ module Cli
module Targets
# Abstract class used to implement a Backup Target
class Target
# Backup creation and restore option flags
#
# TODO: Migrate to a unified backup specific Options implementation
# @return [::Backup::Options]
attr_reader :options
attr_reader :context
def initialize(options:)
@options = options
def initialize(context = nil)
@context = context
end
def asynchronous?
......@@ -23,13 +19,12 @@ def asynchronous?
# dump task backup to `path`
#
# @param [String] path fully qualified backup task destination
# @param [String] backup_id unique identifier for the backup
def dump(path, backup_id)
def dump(path)
raise NotImplementedError
end
# restore task backup from `path`
def restore(path, backup_id)
def restore(path)
raise NotImplementedError
end
end
......
......@@ -14,7 +14,7 @@ def destination_path = 'artifacts.tar.gz'
private
def local
::Backup::Targets::Files.new(nil, storage_path, options: options, excludes: ['tmp'])
Gitlab::Backup::Cli::Targets::Files.new(context, storage_path, excludes: ['tmp'])
end
def storage_path = context.ci_job_artifacts_path
......
......@@ -14,7 +14,7 @@ def destination_path = 'builds.tar.gz'
private
def target
::Backup::Targets::Files.new(nil, storage_path, options: options)
Gitlab::Backup::Cli::Targets::Files.new(context, storage_path)
end
def storage_path = context.ci_builds_path
......
......@@ -14,7 +14,7 @@ def destination_path = 'ci_secure_files.tar.gz'
private
def local
::Backup::Targets::Files.new(nil, storage_path, options: options, excludes: ['tmp'])
Gitlab::Backup::Cli::Targets::Files.new(context, storage_path, excludes: ['tmp'])
end
def storage_path = context.ci_secure_files_path
......
......@@ -16,7 +16,7 @@ def cleanup_path = 'db'
private
def target
::Gitlab::Backup::Cli::Targets::Database.new(options: options)
::Gitlab::Backup::Cli::Targets::Database.new
end
end
end
......
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册