Newer
Older
ACTION=""
export BACKUP_BUCKET_NAME=${BACKUP_BUCKET_NAME-gitlab-backups}
export AWS_KMS_SETTINGS=""
export AWS_S3_SETTINGS=""
export AZURE_CONFIG_FILE="/etc/gitlab/objectstorage/azure_config"
AWS_KMS_SETTINGS_LIST=()
AWS_S3_SETTINGS_LIST=()
start_timestamp=$(date +%s)
backups_path=$rails_dir/tmp/backups
backup_tars_path=$rails_dir/tmp/backup_tars
object_storage_backends=( registry uploads artifacts lfs packages external_diffs terraform_state pages ci_secure_files )
Usage: backup-utility [--restore|--cleanup] [-f URL] [-t TIMESTAMP] [--skip COMPONENT] [--backend BACKEND] [--s3config CONFIG]
Options:
-h, --help Show this help message and exit.
--restore [-t TIMESTAMP | -f URL] When specified, utility restores from an existing backup specified
as url or timestamp in object storage.
-f URL http(s):/ftp:/file:/// URL with backup location. Use with --restore.
-t TIMESTAMP Timestamp (part before '_gitlab_backup.tar' in archive name),
can be used to specify backup source or target name.
--rsyncable Pass the '--rsyncable' parameter to gzip for artifact compression.
--skip COMPONENT When specified, utility will skip the backup or restore of COMPONENT.
May be defined multiple times. Valid values for COMPONENT are
db, repositories, and any of the object storages (e.g. 'lfs').
--backend BACKEND Object storage backend to use for backups.
--s3config CONFIG S3 backend configuration to use for backups storage.
Special config file for s3cmd (see: https://s3tools.org/usage)
Not required when using the awscli tool.
--s3tool TOOL S3 CLI tool to use. Can be either 's3cmd' or 'awscli'.
--storage-class CLASSNAME Pass this storage class to the gcs, s3cmd, aws, or azcopy cli for more
cost-efficient storage of backups.
--maximum-backups N Only keep the most recent N number of backups, deleting others after success.
Requires s3config or AWS credentials to be able to list and delete objects.
--cleanup Run the backup cleanup without creating a new backup. Can be used with the
'maximum-backups' option to clean old remote backups.
--aws-kms-key-id Add KMS key id when S3 bucket is encrypted with a customer key.
--aws-s3-endpoint-url Specify an AWS S3 endpoint URL.
--aws-region Add AWS region (required for AWS STS regionalized endpoint).
--azure-config-file Path of the config file to configure Azure Block Storage access.
--repositories-server-side When Gitaly is configured properly, utility will cause Gitaly to backup
repositories directly to object storage.
--skip-restore-prompt Skip prompts during restore process
--skip-cleanup Skip cleanup after backup process
# Checks if provided argument is a url for downloading it
function is_url() {
regex='(https?|ftp|file)://[-A-Za-z0-9\+&@#/%?=~_|!:,.;]*[-A-Za-z0-9\+&@#/%=~_|]'
[[ $1 =~ $regex ]]
}
mkdir -p $backups_path
output_path=$backups_path/0_gitlab_backup.tar
if is_url $1; then
>&2 echo "Downloading from $1";
curl -f --retry 6 --progress-bar -o $output_path $1
else # It's a timestamp
file_name="$1_gitlab_backup.tar"
case "$BACKUP_BACKEND" in
s3)
case "$S3_TOOL" in
s3cmd) s3cmd ${S3_CMD_BACKUP_OPTION} get "s3://$BACKUP_BUCKET_NAME/$file_name" $output_path > /dev/null ;;
awscli) aws s3 cp "s3://$BACKUP_BUCKET_NAME/$file_name" $output_path ${AWS_S3_SETTINGS_LIST[@]} ${AWS_KMS_SETTINGS_LIST[@]} > /dev/null ;;
*) echo "Unknown S3 tool: ${S3_TOOL}" ;;
esac
;;
gcs) gsutil cp "gs://$BACKUP_BUCKET_NAME/$file_name" $output_path > /dev/null ;;
azcopy copy "$(get_azure_url)/${BACKUP_BUCKET_NAME}/${file_name}?$(get_azure_token)" ${output_path} --output-level quiet ;;
*) echo "Unknown backend: ${BACKUP_BACKEND}" ;;
esac
function unpack_backup(){
local file_path=$1
cd $(dirname $file_path)
echo "Unpacking backup"
if [ ! -f $file_path ]; then
echo $file_path not found
exit 1
fi
tar -xf $file_path
}
function pack_backup(){
echo "Packing up backup tar"
local backup_name=$1
tar -cf ${backup_tars_path}/${backup_name}.tar -C $backups_path .
}
function get_version(){
cat $rails_dir/VERSION
}
function get_azure_url(){
echo -n $(object-storage-azure-url)
}
function get_azure_token(){
echo -n $(object-storage-azure-token)
}
function get_backup_id(){
echo -n ${BACKUP_TIMESTAMP}
elif [ -n "$BACKUP" ]; then
echo -n ${BACKUP}
local timestamp=$(date --date="@$start_timestamp" +%s_%Y_%m_%d)
local gitlab_version=$(get_version)
echo -n ${timestamp}_${gitlab_version}
function get_backup_name(){
echo -n "$(get_backup_id)_gitlab_backup"
function get_existing_backups(){
# This will only match backups with the same naming convention as backups generated by this script
# Example: TIMESTAMP_YYYY_MM_DD_VERSION_gitlab_backup.tar
case "$S3_TOOL" in
s3cmd)
existing_backups=($(s3cmd ${S3_CMD_BACKUP_OPTION} ls s3://$BACKUP_BUCKET_NAME --rinclude '^\d{10}_\d{4}_\d{2}_\d{2}_.+_gitlab_backup.tar$' | awk '{print $4}' | LC_ALL=C sort))
;;
awscli)
existing_backups=($(aws s3 ls ${AWS_S3_SETTINGS_LIST[@]} "s3://$BACKUP_BUCKET_NAME" | awk '{print $4}' | grep -E '^[0-9]{10}_[0-9]{4}_[0-9]{2}_[0-9]{2}_.+_gitlab_backup.tar$' | LC_ALL=C sort))
;;
*)
echo "Unknown S3 tool: ${S3_TOOL}"
exit 1
;;
esac
# Note: gsutil doesn't support regex, so we need to try to match the prefix as best we can with wildcards
# https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames#other-wildcard-characters
existing_backups=($(gsutil ls gs://$BACKUP_BUCKET_NAME/[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9]_[0-9][0-9]_[0-9][0-9]_\*_gitlab_backup.tar | LC_ALL=C sort))
existing_backups=($(azcopy list "$(get_azure_url)/${BACKUP_BUCKET_NAME}?$(get_azure_token)" --output-type=json \
| jq -sr '.[] | select(.MessageType=="ListObject") | .MessageContent | fromjson | select(.Path|test("^[0-9]{10}_[0-9]{4}_[0-9]{2}_[0-9]{2}_.+_gitlab_backup.tar$")) | .Path' \
| LC_ALL=C sort))
*)
echo "Unknown backend for backup: ${BACKUP_BACKEND}"
exit 1
;;
}
function remove_backup(){
local backup_to_remove=$1
case "$BACKUP_BACKEND" in
s3)
case "$S3_TOOL" in
s3cmd) s3cmd ${S3_CMD_BACKUP_OPTION} del ${backup_to_remove} > /dev/null ;;
awscli) aws s3 rm ${AWS_S3_SETTINGS_LIST[@]} "s3://${BACKUP_BUCKET_NAME}/${backup_to_remove}" > /dev/null ;;
*)
echo "Unknown S3 tool: $S3_TOOL"
exit 1
;;
esac
;;
gcs) gsutil rm ${backup_to_remove} > /dev/null ;;
azure) azcopy remove "$(get_azure_url)/${BACKUP_BUCKET_NAME}/${backup_to_remove}?$(get_azure_token)" --output-level essential ;;
*)
echo "Unknown backend for backup: ${BACKUP_BACKEND}"
exit 1
;;
esac
}
rm -rf $backup_tars_path/*
get_existing_backups
echo "Found ${#existing_backups[@]} existing backups. Maximum allowed is $MAXIMUM_BACKUPS"
if [ ${#existing_backups[@]} -gt $MAXIMUM_BACKUPS ]; then
i=0
while [ $i -lt $(expr ${#existing_backups[@]} - $MAXIMUM_BACKUPS) ]; do
echo "Deleting old backup ${existing_backups[$i]}"
remove_backup ${existing_backups[$i]}
((++i))
done
fi
echo "[DONE] Finished pruning old backups"
fi
cat << EOF > $backups_path/backup_information.yml
:db_version: $(gitlab-rails runner "File.write('/tmp/db_version', ActiveRecord::Migrator.current_version.to_s)" && cat /tmp/db_version)
:backup_created_at: $(date --date="@$start_timestamp" "+%Y-%m-%d %H:%M:%S %z")
:gitlab_version: $(get_version)
:tar_version: $(tar --version | head -n 1)
:installation_type: gitlab-helm-chart
:repositories_server_side: $(are_repositories_server_side)
all=( builds.tar.gz pages.tar.gz )
if [[ ${skipped_via_flag[@]} =~ "db" ]]; then
skipped_string="$skipped_string,db";
fi
if [[ ${skipped_via_flag[@]} =~ "repositories" ]]; then
skipped_string="$skipped_string,repositories";
fi
for backup_item in ${all[@]}; do
if [ ! -e $backups_path/$backup_item ]; then
skipped_string="$skipped_string,${backup_item%.tar.gz}";
fi;
done;
echo ${skipped_string#,}
}
function are_repositories_server_side() {
if [ "$REPOSITORIES_SERVER_SIDE" = true ]; then
echo 'true'
else
echo 'false'
fi
}
export BACKUP=$(get_backup_id)
rm -rf $backup_tars_path $backups_path && mkdir -p $backup_tars_path $backups_path
if ! [[ ${skipped_via_flag[@]} =~ "db" ]]; then
gitlab-rake gitlab:backup:db:create
fi
if ! [[ ${skipped_via_flag[@]} =~ "repositories" ]]; then
gitlab-rake gitlab:backup:repo:create
fi
for backup_item in ${object_storage_backends[@]}; do
if ! [[ ${skipped_via_flag[@]} =~ $backup_item ]]; then
object-storage-backup $backup_item $backups_path/${backup_item}.tar.gz
fi
case "$BACKUP_BACKEND" in
s3)
case "$S3_TOOL" in
s3cmd)
if [ -z "${STORAGE_CLASS}" ]; then
s3cmd ${S3_CMD_BACKUP_OPTION} put \
"${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
else
s3cmd ${S3_CMD_BACKUP_OPTION} put --storage-class "${STORAGE_CLASS}" \
"${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
fi
;;
awscli)
if [ -z "${STORAGE_CLASS}" ]; then
aws s3 cp ${AWS_S3_SETTINGS_LIST[@]} ${AWS_KMS_SETTINGS_LIST[@]} \
"${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
else
aws s3 cp --storage-class "${STORAGE_CLASS}" ${AWS_S3_SETTINGS_LIST[@]} ${AWS_KMS_SETTINGS_LIST[@]} \
"${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
fi
;;
*) echo "Unknown S3 tool: $S3_TOOL" ;;
esac
echo "[DONE] Backup can be found at s3://$BACKUP_BUCKET_NAME/${backup_name}.tar"
;;
gcs)
if [ -z "${STORAGE_CLASS}" ]; then
gsutil cp -n ${backup_tars_path}/${backup_name}.tar gs://$BACKUP_BUCKET_NAME > /dev/null
else
gsutil cp -s "${STORAGE_CLASS}" -n ${backup_tars_path}/${backup_name}.tar gs://$BACKUP_BUCKET_NAME > /dev/null
fi
echo "[DONE] Backup can be found at gs://$BACKUP_BUCKET_NAME/${backup_name}.tar"
;;
azure)
if [ -z "${STORAGE_CLASS}" ]; then
azcopy copy "${backup_tars_path}/${backup_name}.tar" --output-level essential \
"$(get_azure_url)/${BACKUP_BUCKET_NAME}/${backup_name}.tar?$(get_azure_token)"
else
azcopy copy --block-blob-tier "${STORAGE_CLASS}" --output-level essential \
"${backup_tars_path}/${backup_name}.tar" "$(get_azure_url)/${BACKUP_BUCKET_NAME}/${backup_name}.tar?$(get_azure_token)"
fi
echo "[DONE] Backup can be found at ${AZURE_BASE_URL}/${BACKUP_BUCKET_NAME}/${backup_name}.tar"
;;
*) echo "Unknown backend for backup: ${BACKUP_BACKEND}" ;;
esac
if ! [ "$SKIP_CLEANUP" = true ]; then
cleanup
fi
[[ $SKIPPED =~ (^|[^[:alnum:]])$1([^[:alnum:]]|$) ]]
if [ -z "$BACKUP_URL" ] && [ -z "$BACKUP_TIMESTAMP" ]; then
echo "You need to set BACKUP_URL or BACKUP_TIMESTAMP variable"
local backup_remote=${BACKUP_URL:-$BACKUP_TIMESTAMP}
local backup_basename=$(basename "$backup_remote")
local backup_id=${backup_basename%_gitlab_backup.tar}
export BACKUP=$backup_id
file=$(fetch_remote_backup $backup_remote)
skipped_line=$(grep skipped $(dirname $file)/backup_information.yml)
export SKIPPED=$(echo ${skipped_line#:skipped:},$(IFS=,; echo -n "${skipped_via_flag[*]}"))
installation_type_line=$(grep installation_type $(dirname $file)/backup_information.yml || echo ":installation_type: unknown")
export INSTALLATION_TYPE=$(echo ${installation_type_line#:installation_type: })
! is_skipped "db" && gitlab-rake gitlab:backup:db:restore
! is_skipped "repositories" && gitlab-rake gitlab:backup:repo:restore
! is_skipped "builds" && gitlab-rake gitlab:backup:builds:restore
if [ "$INSTALLATION_TYPE" = "gitlab-helm-chart" ]; then
for restore_item in ${object_storage_backends[@]}; do
if [ -f $backups_path/${restore_item}.tar.gz ]; then
! is_skipped $restore_item && object-storage-restore $restore_item $backups_path/${restore_item}.tar.gz
fi
done
else
echo "Backup tarball not from a Helm chart based installation. Not processing files in object storage."
fi
BACKUP_TIMESTAMP="$2"
shift
shift
;;
--s3tool)
export S3_TOOL="$2"
shift
shift
;;
--s3config)
if [ ! -f $2 ]; then
echo "s3cmd file specified does not exist";
exit 1;
fi
export S3_CMD_BACKUP_OPTION="--config=$2 "
shift
shift
;;
if [ -z "$ACTION" ]; then
ACTION="restore"
echo "Only one action at a time is supported"
exit 1
fi
--rsyncable)
export GZIP_RSYNCABLE="yes"
shift
;;
shift
--storage-class)
export STORAGE_CLASS="$2"
shift
shift
--maximum-backups)
export MAXIMUM_BACKUPS="$2"
if ! [[ $MAXIMUM_BACKUPS =~ ^-?[0-9]+$ ]]; then
echo "Value specified for --maximum-backups must be an integer. Got: ${MAXIMUM_BACKUPS}"
exit 1
fi
--cleanup)
if [ -z "$ACTION" ]; then
ACTION="cleanup"
echo "Only one action at a time is supported"
exit 1
fi
shift
;;
--aws-kms-key-id)
AWS_KMS_SETTINGS_LIST+=(--sse aws:kms --sse-kms-key-id $2)
shift
shift
;;
--aws-s3-endpoint-url)
AWS_S3_SETTINGS_LIST+=(--endpoint-url $2)
shift
shift
--aws-region)
AWS_REGION=$2
shift
shift
;;
--azure-config-file)
export AZURE_CONFIG_FILE=$2
shift
shift
;;
--repositories-server-side)
export REPOSITORIES_SERVER_SIDE="true"
shift
;;
--skip-restore-prompt)
export GITLAB_ASSUME_YES=1
shift
;;
--skip-cleanup)
export SKIP_CLEANUP="true"
shift
;;
usage
echo "Unexpected parameter: $key"
exit 1
# AWS_REGION can be set as an enviromnent variable and can be overwritten by the argument --aws-region
if [ -n "$AWS_REGION" ]; then
AWS_S3_SETTINGS_LIST+=(--region ${AWS_REGION})
fi
# These variables will be used by the ruby scripts
AWS_KMS_SETTINGS="${AWS_KMS_SETTINGS_LIST[*]}"
AWS_S3_SETTINGS="${AWS_S3_SETTINGS_LIST[*]}"
if [ "$ACTION" = "restore" ]; then
elif [ "$ACTION" = "cleanup" ]; then
cleanup
elif [ -z "$ACTION" ]; then
ACTION="backup"
backup