Skip to content
代码片段 群组 项目
backup-utility 16.9 KB
Newer Older
export BACKUP_BUCKET_NAME=${BACKUP_BUCKET_NAME-gitlab-backups}
export BACKUP_BACKEND=${BACKUP_BACKEND-s3}
export S3_TOOL=${S3_TOOL-s3cmd}
export AWS_KMS_SETTINGS=""
export AWS_S3_SETTINGS=""
export AZURE_CONFIG_FILE="/etc/gitlab/objectstorage/azure_config"
AWS_KMS_SETTINGS_LIST=()
AWS_S3_SETTINGS_LIST=()
start_timestamp=$(date +%s)
rails_dir=/srv/gitlab
Ahmad Hassan's avatar
Ahmad Hassan 已提交
backups_path=$rails_dir/tmp/backups
backup_tars_path=$rails_dir/tmp/backup_tars
object_storage_backends=( registry uploads artifacts lfs packages external_diffs terraform_state pages ci_secure_files )
skipped_via_flag=()

function usage()
{
  cat << HEREDOC
   Usage: backup-utility [--restore|--cleanup] [-f URL] [-t TIMESTAMP] [--skip COMPONENT] [--backend BACKEND] [--s3config CONFIG]

   Options:
     -h, --help                             Show this help message and exit.
     --restore [-t TIMESTAMP | -f URL]      When specified, utility restores from an existing backup specified
                                            as url or timestamp in object storage.
     -f URL                                 http(s):/ftp:/file:/// URL with backup location. Use with --restore.
     -t TIMESTAMP                           Timestamp (part before '_gitlab_backup.tar' in archive name),
                                            can be used to specify backup source or target name.
     --rsyncable                            Pass the '--rsyncable' parameter to gzip for artifact compression.
     --skip COMPONENT                       When specified, utility will skip the backup or restore of COMPONENT.
                                            May be defined multiple times. Valid values for COMPONENT are
                                            db, repositories, and any of the object storages (e.g. 'lfs').
     --backend BACKEND                      Object storage backend to use for backups.
                                            Can be either 's3', 'gcs', or 'azure'.
     --s3config CONFIG                      S3 backend configuration to use for backups storage.
                                            Special config file for s3cmd (see: https://s3tools.org/usage)
                                            Not required when using the awscli tool.
     --s3tool TOOL                          S3 CLI tool to use. Can be either 's3cmd' or 'awscli'.
     --storage-class CLASSNAME              Pass this storage class to the gcs, s3cmd, aws, or azcopy cli for more
                                            cost-efficient storage of backups.
Jon Aumann's avatar
Jon Aumann 已提交
     --maximum-backups N                    Only keep the most recent N number of backups, deleting others after success.
                                            Requires s3config or AWS credentials to be able to list and delete objects.
     --cleanup                              Run the backup cleanup without creating a new backup. Can be used with the
                                            'maximum-backups' option to clean old remote backups.
     --aws-kms-key-id                       Add KMS key id when S3 bucket is encrypted with a customer key.
     --aws-s3-endpoint-url                  Specify an AWS S3 endpoint URL.
     --aws-region                           Add AWS region (required for AWS STS regionalized endpoint).
     --azure-config-file                    Path of the config file to configure Azure Block Storage access.
     --repositories-server-side             When Gitaly is configured properly, utility will cause Gitaly to backup
                                            repositories directly to object storage.
     --skip-restore-prompt                  Skip prompts during restore process
     --skip-cleanup                         Skip cleanup after backup process
# Checks if provided argument is a url for downloading it
function is_url() {
  regex='(https?|ftp|file)://[-A-Za-z0-9\+&@#/%?=~_|!:,.;]*[-A-Za-z0-9\+&@#/%=~_|]'

  [[ $1 =~ $regex ]]
}

function fetch_remote_backup(){
Ahmad Hassan's avatar
Ahmad Hassan 已提交
  mkdir -p $backups_path
  output_path=$backups_path/0_gitlab_backup.tar
  if is_url $1; then
    >&2 echo "Downloading from $1";
    curl -f --retry 6 --progress-bar -o $output_path $1
  else # It's a timestamp
    file_name="$1_gitlab_backup.tar"
    case "$BACKUP_BACKEND" in
      s3)
        case "$S3_TOOL" in
          s3cmd) s3cmd ${S3_CMD_BACKUP_OPTION} get "s3://$BACKUP_BUCKET_NAME/$file_name" $output_path > /dev/null ;;
          awscli) aws s3 cp "s3://$BACKUP_BUCKET_NAME/$file_name" $output_path ${AWS_S3_SETTINGS_LIST[@]} ${AWS_KMS_SETTINGS_LIST[@]} > /dev/null ;;
          *) echo "Unknown S3 tool: ${S3_TOOL}" ;;
        esac
        ;;
      gcs) gsutil cp "gs://$BACKUP_BUCKET_NAME/$file_name" $output_path > /dev/null ;;
        azcopy copy "$(get_azure_url)/${BACKUP_BUCKET_NAME}/${file_name}?$(get_azure_token)" ${output_path} --output-level quiet ;;
      *) echo "Unknown backend: ${BACKUP_BACKEND}" ;;
    esac
  echo $output_path
function unpack_backup(){
  local file_path=$1
  cd $(dirname $file_path)

  echo "Unpacking backup"

  if [ ! -f $file_path ]; then
    echo $file_path not found
    exit 1
  fi

  tar -xf $file_path
}

Ahmad Hassan's avatar
Ahmad Hassan 已提交
function pack_backup(){
  echo "Packing up backup tar"
  local backup_name=$1
  tar -cf ${backup_tars_path}/${backup_name}.tar -C $backups_path .
}

function get_version(){
  cat $rails_dir/VERSION
}

function get_azure_url(){
  echo -n $(object-storage-azure-url)
}

function get_azure_token(){
  echo -n $(object-storage-azure-token)
}

  if [ -n "$BACKUP_TIMESTAMP" ]; then
    echo -n ${BACKUP_TIMESTAMP}
  elif [ -n "$BACKUP" ]; then
    echo -n ${BACKUP}
    local timestamp=$(date --date="@$start_timestamp" +%s_%Y_%m_%d)
    local gitlab_version=$(get_version)
    echo -n ${timestamp}_${gitlab_version}
function get_backup_name(){
  echo -n "$(get_backup_id)_gitlab_backup"
  # This will only match backups with the same naming convention as backups generated by this script
  # Example: TIMESTAMP_YYYY_MM_DD_VERSION_gitlab_backup.tar
Jon Aumann's avatar
Jon Aumann 已提交
  case $BACKUP_BACKEND in
    s3)
      case "$S3_TOOL" in
        s3cmd)
          existing_backups=($(s3cmd ${S3_CMD_BACKUP_OPTION} ls s3://$BACKUP_BUCKET_NAME --rinclude '^\d{10}_\d{4}_\d{2}_\d{2}_.+_gitlab_backup.tar$' | awk '{print $4}' | LC_ALL=C sort))
          ;;
        awscli)
          existing_backups=($(aws s3 ls ${AWS_S3_SETTINGS_LIST[@]} "s3://$BACKUP_BUCKET_NAME" | awk '{print $4}' | grep -E '^[0-9]{10}_[0-9]{4}_[0-9]{2}_[0-9]{2}_.+_gitlab_backup.tar$' | LC_ALL=C sort))
          ;;
        *)
          echo "Unknown S3 tool: ${S3_TOOL}"
          exit 1
          ;;
      esac
Jon Aumann's avatar
Jon Aumann 已提交
      ;;
    gcs)
      # Note: gsutil doesn't support regex, so we need to try to match the prefix as best we can with wildcards
Jon Aumann's avatar
Jon Aumann 已提交
      # https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames#other-wildcard-characters
      existing_backups=($(gsutil ls gs://$BACKUP_BUCKET_NAME/[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9]_[0-9][0-9]_[0-9][0-9]_\*_gitlab_backup.tar | LC_ALL=C sort))
Jon Aumann's avatar
Jon Aumann 已提交
      ;;
      existing_backups=($(azcopy list "$(get_azure_url)/${BACKUP_BUCKET_NAME}?$(get_azure_token)" --output-type=json \
        | jq -sr '.[] | select(.MessageType=="ListObject") | .MessageContent | fromjson | select(.Path|test("^[0-9]{10}_[0-9]{4}_[0-9]{2}_[0-9]{2}_.+_gitlab_backup.tar$")) | .Path' \
        | LC_ALL=C sort))
Jon Aumann's avatar
Jon Aumann 已提交
    *)
      echo "Unknown backend for backup: ${BACKUP_BACKEND}"
      exit 1
      ;;
}

function remove_backup(){
  local backup_to_remove=$1
  case "$BACKUP_BACKEND" in
    s3)
      case "$S3_TOOL" in
        s3cmd) s3cmd ${S3_CMD_BACKUP_OPTION} del ${backup_to_remove} > /dev/null ;;
        awscli) aws s3 rm ${AWS_S3_SETTINGS_LIST[@]} "s3://${BACKUP_BUCKET_NAME}/${backup_to_remove}" > /dev/null ;;
        *)
          echo "Unknown S3 tool: $S3_TOOL"
          exit 1
          ;;
      esac
      ;;
    gcs) gsutil rm ${backup_to_remove} > /dev/null ;;
    azure) azcopy remove "$(get_azure_url)/${BACKUP_BUCKET_NAME}/${backup_to_remove}?$(get_azure_token)" --output-level essential ;;
    *)
      echo "Unknown backend for backup: ${BACKUP_BACKEND}"
      exit 1
      ;;
  esac
Ahmad Hassan's avatar
Ahmad Hassan 已提交
function cleanup(){
  rm -rf $backups_path/*

  if [ -n "$MAXIMUM_BACKUPS" ]; then
    get_existing_backups

    echo "Found ${#existing_backups[@]} existing backups. Maximum allowed is $MAXIMUM_BACKUPS"
    if [ ${#existing_backups[@]} -gt $MAXIMUM_BACKUPS ]; then
      i=0
      while [ $i -lt $(expr ${#existing_backups[@]} - $MAXIMUM_BACKUPS) ]; do
        echo "Deleting old backup ${existing_backups[$i]}"
        remove_backup ${existing_backups[$i]}
        ((++i))
      done
    fi
    echo "[DONE] Finished pruning old backups"
  fi
Ahmad Hassan's avatar
Ahmad Hassan 已提交
}

function write_backup_info(){
  cat << EOF > $backups_path/backup_information.yml
:backup_id: $(get_backup_id)
:db_version: $(gitlab-rails runner "File.write('/tmp/db_version', ActiveRecord::Migrator.current_version.to_s)" && cat /tmp/db_version)
:backup_created_at: $(date --date="@$start_timestamp" "+%Y-%m-%d %H:%M:%S %z")
Ahmad Hassan's avatar
Ahmad Hassan 已提交
:gitlab_version: $(get_version)
:tar_version: $(tar --version | head -n 1)
:installation_type: gitlab-helm-chart
:repositories_server_side: $(are_repositories_server_side)
function get_skipped(){
  all=( builds.tar.gz pages.tar.gz )

  for storage in ${object_storage_backends[@]}; do
    all+=( "${storage}.tar.gz" );
  done;
  if [[ ${skipped_via_flag[@]} =~ "db" ]]; then
    skipped_string="$skipped_string,db";
  fi
  if [[ ${skipped_via_flag[@]} =~ "repositories" ]]; then
    skipped_string="$skipped_string,repositories";
  fi

  for backup_item in ${all[@]}; do
    if [ ! -e $backups_path/$backup_item ]; then
      skipped_string="$skipped_string,${backup_item%.tar.gz}";
    fi;
  done;

  echo ${skipped_string#,}
}

function are_repositories_server_side() {
  if [ "$REPOSITORIES_SERVER_SIDE" = true ]; then
    echo 'true'
  else
    echo 'false'
  fi
}

Ahmad Hassan's avatar
Ahmad Hassan 已提交
function backup(){
  export BACKUP=$(get_backup_id)
Ahmad Hassan's avatar
Ahmad Hassan 已提交
  backup_name=$(get_backup_name)
  rm -rf $backup_tars_path $backups_path && mkdir -p $backup_tars_path $backups_path
  if ! [[ ${skipped_via_flag[@]} =~ "db" ]]; then
    gitlab-rake gitlab:backup:db:create
  fi
  if ! [[ ${skipped_via_flag[@]} =~ "repositories" ]]; then
    gitlab-rake gitlab:backup:repo:create
  fi
  for backup_item in ${object_storage_backends[@]}; do
    if ! [[ ${skipped_via_flag[@]} =~ $backup_item ]]; then
      object-storage-backup $backup_item $backups_path/${backup_item}.tar.gz
    fi
  skipped=$(get_skipped)
  write_backup_info $skipped
Ahmad Hassan's avatar
Ahmad Hassan 已提交
  pack_backup $backup_name
  case "$BACKUP_BACKEND" in
    s3)
      case "$S3_TOOL" in
        s3cmd)
          if [ -z "${STORAGE_CLASS}" ]; then
            s3cmd ${S3_CMD_BACKUP_OPTION} put \
              "${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
          else
            s3cmd ${S3_CMD_BACKUP_OPTION} put --storage-class "${STORAGE_CLASS}" \
              "${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
          fi
          ;;
        awscli)
          if [ -z "${STORAGE_CLASS}" ]; then
            aws s3 cp ${AWS_S3_SETTINGS_LIST[@]} ${AWS_KMS_SETTINGS_LIST[@]} \
              "${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
          else
            aws s3 cp --storage-class "${STORAGE_CLASS}" ${AWS_S3_SETTINGS_LIST[@]} ${AWS_KMS_SETTINGS_LIST[@]} \
              "${backup_tars_path}/${backup_name}.tar" "s3://$BACKUP_BUCKET_NAME" > /dev/null
          fi
          ;;
        *) echo "Unknown S3 tool: $S3_TOOL" ;;
      esac
      echo "[DONE] Backup can be found at s3://$BACKUP_BUCKET_NAME/${backup_name}.tar"
      ;;
    gcs)
      if [ -z "${STORAGE_CLASS}" ]; then
        gsutil cp -n ${backup_tars_path}/${backup_name}.tar gs://$BACKUP_BUCKET_NAME > /dev/null
      else
        gsutil cp -s "${STORAGE_CLASS}" -n ${backup_tars_path}/${backup_name}.tar gs://$BACKUP_BUCKET_NAME > /dev/null
      fi
      echo "[DONE] Backup can be found at gs://$BACKUP_BUCKET_NAME/${backup_name}.tar"
      ;;
    azure)
      if [ -z "${STORAGE_CLASS}" ]; then
        azcopy copy "${backup_tars_path}/${backup_name}.tar" --output-level essential \
           "$(get_azure_url)/${BACKUP_BUCKET_NAME}/${backup_name}.tar?$(get_azure_token)"
      else
        azcopy copy --block-blob-tier "${STORAGE_CLASS}" --output-level essential \
          "${backup_tars_path}/${backup_name}.tar" "$(get_azure_url)/${BACKUP_BUCKET_NAME}/${backup_name}.tar?$(get_azure_token)"
      fi
      echo "[DONE] Backup can be found at ${AZURE_BASE_URL}/${BACKUP_BUCKET_NAME}/${backup_name}.tar"
      ;;
    *) echo "Unknown backend for backup: ${BACKUP_BACKEND}" ;;
  esac
  if ! [ "$SKIP_CLEANUP" = true ]; then
    cleanup
  fi
function is_skipped() {
  [[ $SKIPPED =~ (^|[^[:alnum:]])$1([^[:alnum:]]|$) ]]
  if [ -z "$BACKUP_URL" ] && [ -z "$BACKUP_TIMESTAMP" ]; then
    echo "You need to set BACKUP_URL or BACKUP_TIMESTAMP variable"
  local backup_remote=${BACKUP_URL:-$BACKUP_TIMESTAMP}
  local backup_basename=$(basename "$backup_remote")
  local backup_id=${backup_basename%_gitlab_backup.tar}
  file=$(fetch_remote_backup $backup_remote)
  unpack_backup $file
  skipped_line=$(grep skipped $(dirname $file)/backup_information.yml)
  export SKIPPED=$(echo ${skipped_line#:skipped:},$(IFS=,; echo -n "${skipped_via_flag[*]}"))
  installation_type_line=$(grep installation_type $(dirname $file)/backup_information.yml || echo ":installation_type: unknown")
  export INSTALLATION_TYPE=$(echo ${installation_type_line#:installation_type: })

  ! is_skipped "db"           && gitlab-rake gitlab:backup:db:restore
  ! is_skipped "repositories" && gitlab-rake gitlab:backup:repo:restore
  ! is_skipped "builds"       && gitlab-rake gitlab:backup:builds:restore
  if [ "$INSTALLATION_TYPE" = "gitlab-helm-chart" ]; then
    for restore_item in ${object_storage_backends[@]}; do
      if [ -f $backups_path/${restore_item}.tar.gz ]; then
        ! is_skipped $restore_item && object-storage-restore $restore_item $backups_path/${restore_item}.tar.gz
      fi
    done
  else
    echo "Backup tarball not from a Helm chart based installation. Not processing files in object storage."
  fi
  gitlab-rake cache:clear
Ian Baum's avatar
Ian Baum 已提交
  cleanup
while [[ $# -gt 0 ]]
do
    -h|--help)
    -f|--file)
      BACKUP_URL="$2"
      shift
      shift
      ;;
    -t|--timestamp)
      BACKUP_TIMESTAMP="$2"
      shift
      shift
      ;;
      export BACKUP_BACKEND="$2"
    --s3tool)
      export S3_TOOL="$2"
      shift
      shift
      ;;
    --s3config)
      if [ ! -f $2 ]; then
        echo "s3cmd file specified does not exist";
        exit 1;
      fi
      export S3_CMD_BACKUP_OPTION="--config=$2 "
      shift
      shift
      ;;
    --restore)
        echo "Only one action at a time is supported"
        exit 1
      fi
    --rsyncable)
      export GZIP_RSYNCABLE="yes"
      shift
      ;;
      skipped_via_flag+=( "$2" )
    --storage-class)
      export STORAGE_CLASS="$2"
      shift
      shift
    --maximum-backups)
      export MAXIMUM_BACKUPS="$2"
      if ! [[ $MAXIMUM_BACKUPS =~ ^-?[0-9]+$ ]]; then
        echo "Value specified for --maximum-backups must be an integer. Got: ${MAXIMUM_BACKUPS}"
        echo "Only one action at a time is supported"
        exit 1
      fi
    --aws-kms-key-id)
      AWS_KMS_SETTINGS_LIST+=(--sse aws:kms --sse-kms-key-id $2)
      shift
      shift
      ;;
    --aws-s3-endpoint-url)
      AWS_S3_SETTINGS_LIST+=(--endpoint-url $2)
      shift
      shift
      ;;
    --azure-config-file)
      export AZURE_CONFIG_FILE=$2
      shift
      shift
      ;;
    --repositories-server-side)
      export REPOSITORIES_SERVER_SIDE="true"
      shift
      ;;
    --skip-restore-prompt)
      export GITLAB_ASSUME_YES=1
      shift
      ;;
    --skip-cleanup)
      export SKIP_CLEANUP="true"
      shift
      ;;
      usage
      echo "Unexpected parameter: $key"
      exit 1
# AWS_REGION can be set as an enviromnent variable and can be overwritten by the argument --aws-region
if [ -n "$AWS_REGION" ]; then
  AWS_S3_SETTINGS_LIST+=(--region ${AWS_REGION})
fi

# These variables will be used by the ruby scripts
AWS_KMS_SETTINGS="${AWS_KMS_SETTINGS_LIST[*]}"
AWS_S3_SETTINGS="${AWS_S3_SETTINGS_LIST[*]}"

if [ "$ACTION" = "restore" ]; then
elif [ "$ACTION" = "cleanup" ]; then
  cleanup
elif [ -z "$ACTION" ]; then
  ACTION="backup"
  backup