diff --git a/lib/tasks/gitlab/keep_around.rake b/lib/tasks/gitlab/keep_around.rake index 7795df3c5f0d6f17fc7b416cfb5282c64bf306fb..b97c7901a07077d55b7ea778b2f267ef451268b5 100644 --- a/lib/tasks/gitlab/keep_around.rake +++ b/lib/tasks/gitlab/keep_around.rake @@ -16,57 +16,44 @@ namespace :gitlab do create_csv do |csv| logger.info "Finding keep-around references..." - refs = project.repository.raw.list_refs( + project.repository.raw.list_refs( ["refs/#{::Repository::REF_KEEP_AROUND}/"], dynamic_timeout: ::Gitlab::GitalyClient.long_timeout - ).each_with_object({}) do |ref, memo| - memo[ref.target] = { - keep_around: ref.name, - count: 0 - } + ).each do |ref| + csv << ['keep', ref.target] end - logger.info "Found #{refs.count} keep-around references" + add_pipeline_shas(project, csv) + add_merge_request_shas(project, csv) + add_merge_request_diff_shas(project, csv) + add_note_shas(project, csv) + add_sent_notification_shas(project, csv) + add_todo_shas(project, csv) - add_pipeline_shas(project, refs) - add_merge_request_shas(project, refs) - add_merge_request_diff_shas(project, refs) - add_note_shas(project, refs) - add_sent_notification_shas(project, refs) - add_todo_shas(project, refs) - - logger.info "Summary:" - logger.info "\tKeep-around references: #{refs.count}" - logger.info "\tPotentially orphaned: #{refs.values.count { |ref| ref[:count] < 1 }}" - - logger.info "Writing CSV..." - refs.each_value do |ref| - csv << [ref[:keep_around], ref[:count]] - end logger.info "Keep-around orphan report complete" end end - def add_pipeline_shas(project, refs) + def add_pipeline_shas(project, csv) logger.info "Checking pipeline shas..." project.all_pipelines.select(:id, :sha, :before_sha).find_each do |pipeline| - add_match(refs, pipeline.sha) + add_match(csv, pipeline.sha) # before_sha has a project fallback to produce a blank sha. For this # purpose we would prefer not to load project so we are loading the # attribute directly. - add_match(refs, pipeline.read_attribute(:before_sha)) + add_match(csv, pipeline.read_attribute(:before_sha)) end end - def add_merge_request_shas(project, refs) + def add_merge_request_shas(project, csv) logger.info "Checking merge request shas..." merge_requests = MergeRequest.from_and_to_forks(project).select(:id, :merge_commit_sha) merge_requests.find_each do |merge_request| - add_match(refs, merge_request.merge_commit_sha) + add_match(csv, merge_request.merge_commit_sha) end end - def add_merge_request_diff_shas(project, refs) + def add_merge_request_diff_shas(project, csv) logger.info "Checking merge request diff shas..." merge_requests = MergeRequest.from_and_to_forks(project) merge_request_diffs = MergeRequestDiff @@ -74,44 +61,44 @@ namespace :gitlab do .select(:id, :start_commit_sha, :head_commit_sha, :base_commit_sha) merge_request_diffs.find_each do |diff| - add_match(refs, diff.start_commit_sha) - add_match(refs, diff.head_commit_sha) - add_match(refs, diff.base_commit_sha) + add_match(csv, diff.start_commit_sha) + add_match(csv, diff.head_commit_sha) + add_match(csv, diff.base_commit_sha) end end - def add_note_shas(project, refs) + def add_note_shas(project, csv) logger.info "Checking note shas..." logger.warn "System notes will not be included." Note.where(project: project).where('NOT system').each_batch(of: 1000) do |b| b.where.not(commit_id: nil).select(:commit_id).each do |note| - add_match(refs, note.commit_id) + add_match(csv, note.commit_id) end b.where(type: DiffNote).select(:type, :position, :original_position).each do |note| note.shas.each do |sha| - add_match(refs, sha) + add_match(csv, sha) end end end end - def add_sent_notification_shas(_project, _refs) + def add_sent_notification_shas(_project, _csv) logger.warn "Sent notifications will not be included." end - def add_todo_shas(project, refs) + def add_todo_shas(project, csv) logger.info "Checking todo shas..." Todo.where(project: project).each_batch(of: 1000) do |b| b.where.not(commit_id: nil).select(:commit_id).each do |todo| - add_match(refs, todo.commit_id) + add_match(csv, todo.commit_id) end end end - def add_match(refs, sha) - return unless refs[sha] + def add_match(csv, sha) + return if !sha.present? || Gitlab::Git.blank_ref?(sha) - refs[sha][:count] += 1 + csv << ['usage', sha] end def create_csv @@ -123,7 +110,7 @@ namespace :gitlab do end File.open(filename, "w") do |file| - yield CSV.new(file, headers: %w[keep_around count], write_headers: true) + yield CSV.new(file, headers: %w[operation commit_id], write_headers: true) end end diff --git a/spec/tasks/gitlab/keep_around_rake_spec.rb b/spec/tasks/gitlab/keep_around_rake_spec.rb index 49714833fe63ea9f26bb5f39b9b685245fdc3a7a..2e6e1ff7a85c0dc6eb94d008e446df8d103a12f0 100644 --- a/spec/tasks/gitlab/keep_around_rake_spec.rb +++ b/spec/tasks/gitlab/keep_around_rake_spec.rb @@ -53,12 +53,23 @@ end shared_examples 'orphans found' do |keep_around_count:, orphan_count:| - it 'prints a summary' do - expect(logger).to receive(:info).with("Summary:") - expect(logger).to receive(:info).with("\tKeep-around references: #{keep_around_count}") - expect(logger).to receive(:info).with("\tPotentially orphaned: #{orphan_count}") - + it 'creates a report' do run_rake_task('gitlab:keep_around:orphaned') + + csv = CSV.parse(file, headers: true) + keep_counts = {} + + csv.each do |row| + case row['operation'] + when 'keep' + keep_counts[row['commit_id']] = 0 + when 'usage' + keep_counts[row['commit_id']] += 1 if keep_counts.has_key?(row['commit_id']) + end + end + + expect(keep_counts.size).to eq(keep_around_count) + expect(keep_counts.values.count { |keep_count| keep_count == 0 }).to eq(orphan_count) end end