From e4b10bea3677c783254a89a86ef75633759d676a Mon Sep 17 00:00:00 2001 From: James Liu <jliu@gitlab.com> Date: Mon, 3 Mar 2025 07:14:21 +0000 Subject: [PATCH] api: Add documentation for a repository health API Subsequent commits will introduce a new repository-scoped API endpoint that returns repository health data from Gitaly. Add the corresponding Grape entity and Markdown documentation. --- app/models/repository.rb | 12 +++ .../project_repositories_health.yml | 9 ++ doc/api/repositories.md | 48 ++++++++++ lib/api/entities/repository_health.rb | 37 +++++++ lib/api/repositories.rb | 29 ++++++ lib/gitlab/application_rate_limiter.rb | 1 + lib/gitlab/git/repository.rb | 3 +- spec/requests/api/repositories_spec.rb | 96 +++++++++++++++++++ 8 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 config/feature_flags/gitlab_com_derisk/project_repositories_health.yml create mode 100644 lib/api/entities/repository_health.rb diff --git a/app/models/repository.rb b/app/models/repository.rb index 4663902be1338..54d5961adb95d 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -791,6 +791,18 @@ def contributors(ref: nil, order_by: nil, sort: 'asc') Commit.order_by(collection: commits, order_by: order_by, sort: sort) end + def health(generate) + cache.fetch(:health) do + if generate + info = raw_repository.repository_info + + info_h = info.to_h + info_h[:updated_at] = Time.current + info_h + end + end + end + def branch_names_contains(sha, limit: 0, exclude_refs: []) refs = raw_repository.branch_names_contains_sha(sha, limit: adjust_containing_limit(limit: limit, exclude_refs: exclude_refs)) diff --git a/config/feature_flags/gitlab_com_derisk/project_repositories_health.yml b/config/feature_flags/gitlab_com_derisk/project_repositories_health.yml new file mode 100644 index 0000000000000..0ac5c19a83632 --- /dev/null +++ b/config/feature_flags/gitlab_com_derisk/project_repositories_health.yml @@ -0,0 +1,9 @@ +--- +name: project_repositories_health +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/509253 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/182220 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/521115 +milestone: '17.10' +group: group::gitaly +type: gitlab_com_derisk +default_enabled: false diff --git a/doc/api/repositories.md b/doc/api/repositories.md index e9b85b0fbfe71..5c38e639e8789 100644 --- a/doc/api/repositories.md +++ b/doc/api/repositories.md @@ -510,6 +510,54 @@ Example response, with line breaks added for readability: } ``` +## Health + +{{< history >}} + +- [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/182220) in GitLab 17.10. Guarded behind the + [project_repositories_health](https://gitlab.com/gitlab-org/gitlab/-/issues/521115) feature flag. + +{{< /history >}} + +Get statistics related to the health of a project repository. This endpoint is rate-limited to 5 requests/hour per project. + +```plaintext +GET /projects/:id/repository/health +``` + +Supported attributes: + +| Attribute | Type | Required | Description | +|:-----------|:--------|:---------|:---------------------------------------------------------------------------------------| +| `generate` | boolean | no | Whether a new health report should be generated. Set this if the endpoint returns 404. | + +Example request: + +```shell +curl --header "PRIVATE-TOKEN: token" \ + --url "https://gitlab.com/api/v4/projects/42/repository/health" +``` + +Example response: + +```json +{ + "size": 42002816, + "references": { + "loose_count": 3, + "packed_size": 315703, + "reference_backend": "REFERENCE_BACKEND_FILES" + }, + "objects": { + "size": 39651458, + "recent_size": 39461265, + "stale_size": 190193, + "keep_size": 0 + }, + "updated_at": "2025-02-26T03:42:13.015Z" +} +``` + ## Related topics - User documentation for [changelogs](../user/project/changelogs.md) diff --git a/lib/api/entities/repository_health.rb b/lib/api/entities/repository_health.rb new file mode 100644 index 0000000000000..c3c738a510c71 --- /dev/null +++ b/lib/api/entities/repository_health.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module API + module Entities + # rubocop:disable Layout/LineLength -- `desc` is too long + class RepositoryHealth < Grape::Entity + class References < Grape::Entity + expose :loose_count, + documentation: { type: 'integer', desc: 'Number of loose references in the repository.' } + expose :packed_size, + documentation: { type: 'integer', desc: 'Size in bytes of packed references in the repository.' } + expose :reference_backend, + documentation: { type: 'string', + desc: "Type of backend used to store references. Either 'REFERENCE_BACKEND_REFTABLE' or 'REFERENCE_BACKEND_FILES'." } + end + + class Objects < Grape::Entity + expose :size, + documentation: { type: 'integer', desc: 'Size in bytes of all objects in the repository.' } + expose :recent_size, + documentation: { type: 'integer', + desc: 'Size in bytes of all recent objects in the repository. Recent objects are those which are reachable.' } + expose :stale_size, + documentation: { type: 'integer', + desc: 'Size in bytes of all stale objects in the repository. Stale objects are those which are unreachable and may be deleted during housekeeping.' } + expose :keep_size, + documentation: { type: 'integer', desc: 'Size in bytes of all packfiles with the .keep extension.' } + end + + expose :size, documentation: { type: 'integer', desc: 'Repository size in bytes.' } + expose :references, using: References + expose :objects, using: Objects + expose :updated_at, documentation: { type: 'dateTime', example: '2025-02-24T09:05:50.355Z' } + end + # rubocop:enable Layout/LineLength + end +end diff --git a/lib/api/repositories.rb b/lib/api/repositories.rb index 60f86704ba5c2..e06ddc4f1a738 100644 --- a/lib/api/repositories.rb +++ b/lib/api/repositories.rb @@ -231,6 +231,35 @@ def compare_cache_key(current_user, user_project, target_project, params) end end + desc 'Get repository health' do + success Entities::RepositoryHealth + end + params do + optional :generate, type: Boolean, default: false, desc: 'Triggers a new health report to be generated' + end + get ':id/repository/health', urgency: :low do + unless Feature.enabled?(:project_repositories_health, user_project) + not_found! + end + + authorize! :admin_project, user_project + + generate = params[:generate] || false + if generate + check_rate_limit!(:project_repositories_health, scope: [user_project]) do + render_api_error!({ error: 'Repository health has been requested too many times. Try again later.' }, 429) + end + end + + health = user_project.repository.health(generate) + + if health.nil? + not_found! + end + + present health, with: Entities::RepositoryHealth + end + desc 'Get repository contributors' do success Entities::Contributor end diff --git a/lib/gitlab/application_rate_limiter.rb b/lib/gitlab/application_rate_limiter.rb index 2c955e5f1cb7c..24521414d8e86 100644 --- a/lib/gitlab/application_rate_limiter.rb +++ b/lib/gitlab/application_rate_limiter.rb @@ -25,6 +25,7 @@ def rate_limits # rubocop:disable Metrics/AbcSize project_download_export: { threshold: -> { application_settings.project_download_export_limit }, interval: 1.minute }, project_repositories_archive: { threshold: 5, interval: 1.minute }, project_repositories_changelog: { threshold: 5, interval: 1.minute }, + project_repositories_health: { threshold: 5, interval: 1.hour }, project_generate_new_export: { threshold: -> { application_settings.project_export_limit }, interval: 1.minute }, project_import: { threshold: -> { application_settings.project_import_limit }, interval: 1.minute }, play_pipeline_schedule: { threshold: 1, interval: 1.minute }, diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index bceb68f4fd102..8784f7fbb77c1 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -48,6 +48,7 @@ def initialize(error_code) attr_reader :storage, :gl_repository, :gl_project_path, :container delegate :list_oversized_blobs, :list_all_blobs, :list_blobs, to: :gitaly_blob_client + delegate :repository_info, to: :gitaly_repository_client # This remote name has to be stable for all types of repositories that # can join an object pool. If it's structure ever changes, a migration @@ -1329,7 +1330,7 @@ def check_blobs_generated(base, head, changed_paths) end def repository_info_size_megabytes - bytes = gitaly_repository_client.repository_info.size + bytes = repository_info.size Gitlab::Utils.bytes_to_megabytes(bytes).round(2) end diff --git a/spec/requests/api/repositories_spec.rb b/spec/requests/api/repositories_spec.rb index 247dd938cd6ee..0477ef8913f03 100644 --- a/spec/requests/api/repositories_spec.rb +++ b/spec/requests/api/repositories_spec.rb @@ -10,6 +10,7 @@ let(:user) { create(:user) } let(:guest) { create(:user).tap { |u| create(:project_member, :guest, user: u, project: project) } } + let(:developer) { create(:user).tap { |u| create(:project_member, :developer, user: u, project: project) } } let!(:project) { create(:project, :repository, creator: user) } let!(:maintainer) { create(:project_member, :maintainer, user: user, project: project) } @@ -781,6 +782,101 @@ def commit_messages(response) end end + describe 'GET :id/repository/health' do + before do + stub_feature_flags(project_repositories_health: true) + end + + let(:params) { nil } + + subject(:request) do + get(api("/projects/#{project.id}/repository/health", current_user), params: params) + end + + shared_examples 'health' do + it 'returns 404 on first invocation' do + request + + expect(response).to have_gitlab_http_status(:not_found) + end + + it 'returns 404 on subsequent invocations if a report has not been generated' do + 2.times do + request + expect(response).to have_gitlab_http_status(:not_found) + end + end + + describe 'when a new report is generated' do + let(:params) { { generate: true } } + + it 'returns the health report' do + t_start = Time.current + request + t_end = Time.current + + expect(response).to have_gitlab_http_status(:success) + expect(json_response['size']).to be_present + expect(json_response['objects']).to be_present + expect(json_response['references']).to be_present + expect(Time.parse(json_response['updated_at'])).to be_between(t_start, t_end) + end + + context 'when rate limited' do + it 'returns api error' do + allow(Gitlab::ApplicationRateLimiter).to receive(:throttled_request?).and_return(true) + + request + + expect(response).to have_gitlab_http_status(:too_many_requests) + end + end + end + end + + context 'when unauthenticated', 'and project is public' do + it_behaves_like '403 response' do + let(:project) { create(:project, :public, :repository) } + let(:current_user) { nil } + end + end + + context 'when unauthenticated', 'and project is private' do + it_behaves_like '404 response' do + let(:current_user) { nil } + let(:message) { '404 Project Not Found' } + end + end + + context 'when authenticated', 'as a maintainer' do + it_behaves_like 'health' do + let(:current_user) { user } + end + end + + context 'when authenticated', 'as a developer' do + it_behaves_like '403 response' do + let(:current_user) { developer } + end + end + + context 'when authenticated', 'as a guest' do + it_behaves_like '403 response' do + let(:current_user) { guest } + end + end + + context 'when feature flag is disabled' do + before do + stub_feature_flags(project_repositories_health: false) + end + + it_behaves_like '404 response' do + let(:current_user) { user } + end + end + end + describe 'GET :id/repository/merge_base' do let(:refs) do %w[304d257dcb821665ab5110318fc58a007bd104ed 0031876facac3f2b2702a0e53a26e89939a42209 570e7b2abdd848b95f2f578043fc23bd6f6fd24d] -- GitLab