Skip to content
代码片段 群组 项目
未验证 提交 69975ef2 编辑于 作者: Michał Zając's avatar Michał Zając 提交者: GitLab
浏览文件

Merge branch...

Merge branch '480364-resolve-cross-join-issues-in-ee-lib-ee-gitlab-background_migration' into 'master' 

Resolve cross join issues in ee/lib/ee/gitlab/background_migration/backfill_finding_initial_pipeline_id.rb

See merge request https://gitlab.com/gitlab-org/gitlab/-/merge_requests/167789



Merged-by: default avatarMichał Zając <mzajac@gitlab.com>
Approved-by: default avatarMichał Zając <mzajac@gitlab.com>
Co-authored-by: default avatarGregory <11164960-ghavenga@users.noreply.gitlab.com>
No related branches found
No related tags found
无相关合并请求
显示 0 个添加345 个删除
---
migration_job_name: BackfillFindingInitialPipelineId
description: >-
We need to ensure that all existing vulnerability findings have
their initial_finding_pipeline_id column backfilled before dropping
the vulnerability_occurrence_pipelines table.
We do not expect any circumstance under which a
vulnerability_occurrence cannot have these fields fields filled in.
feature_category: vulnerability_management
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/147220
milestone: '17.1'
queued_migration_version: 20240514030609
finalized_by: 20240709060152
# frozen_string_literal: true
class QueueBackfillFindingInitialPipelineId < Gitlab::Database::Migration[2.2]
milestone '17.1'
MIGRATION = "BackfillFindingInitialPipelineId"
DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 1000
SUB_BATCH_SIZE = 250
disable_ddl_transaction!
restrict_gitlab_migration gitlab_schema: :gitlab_main
def up
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.with_suppressed do
queue_batched_background_migration(
MIGRATION,
:vulnerability_occurrences,
:id,
job_interval: DELAY_INTERVAL,
batch_size: BATCH_SIZE,
sub_batch_size: SUB_BATCH_SIZE
)
end
end
def down
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.with_suppressed do
delete_batched_background_migration(MIGRATION, :vulnerability_occurrences, :id, [])
end
end
end
# frozen_string_literal: true
class FinalizeBackfillFindingInitialPipelineId < Gitlab::Database::Migration[2.2]
MIGRATION = 'BackfillFindingInitialPipelineId'
disable_ddl_transaction!
restrict_gitlab_migration gitlab_schema: :gitlab_main
milestone '17.3'
def up
ensure_batched_background_migration_is_finished(
job_class_name: MIGRATION,
table_name: :vulnerability_occurrences,
column_name: :id,
job_arguments: [],
finalize: true
)
end
def down
# no-op
end
end
...@@ -25,7 +25,6 @@ class UpdateFinishStatusForBbmMigrations < Gitlab::Database::Migration[2.2] ...@@ -25,7 +25,6 @@ class UpdateFinishStatusForBbmMigrations < Gitlab::Database::Migration[2.2]
BackfillEpicDatesToWorkItemDatesSources BackfillEpicDatesToWorkItemDatesSources
BackfillEpicIssuesIntoWorkItemParentLinks BackfillEpicIssuesIntoWorkItemParentLinks
BackfillFindingIdInVulnerabilities BackfillFindingIdInVulnerabilities
BackfillFindingInitialPipelineId
BackfillHasIssuesForExternalIssueLinks BackfillHasIssuesForExternalIssueLinks
BackfillHasRemediationsOfVulnerabilityReads BackfillHasRemediationsOfVulnerabilityReads
BackfillIssueSearchDataNamespaceId BackfillIssueSearchDataNamespaceId
......
e4e421745bb30426b1cf80e2c28518f7e01b64270c459ffc9c271bd8183e8eb5
\ No newline at end of file
d910f07c89002c56103abdd7042d3b482a40710cde3a19c8fd8a2ec20298ff93
\ No newline at end of file
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
module BackfillFindingInitialPipelineId
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
prepended do
scope_to ->(relation) do
relation.where(initial_pipeline_id: nil).or(
relation.where(latest_pipeline_id: nil)
)
end
operation_name :backfill_finding_initial_and_latest_pipeline_id
feature_category :vulnerability_management
end
override :perform
def perform
::Gitlab::Database.allow_cross_joins_across_databases(
url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/480364'
) do
each_sub_batch do |sub_batch|
connection.exec_update(update_initial_sql(sub_batch))
connection.exec_update(update_latest_sql(sub_batch))
end
end
end
private
def update_initial_sql(sub_batch)
<<~SQL
WITH cte AS (
SELECT
occurrence_pipelines.*
FROM
vulnerability_occurrences,
LATERAL (
SELECT
occurrence_id,
pipeline_id
FROM
vulnerability_occurrence_pipelines
WHERE
vulnerability_occurrence_pipelines.occurrence_id = vulnerability_occurrences.id
ORDER BY
pipeline_id ASC
LIMIT 1) AS occurrence_pipelines
WHERE
vulnerability_occurrences.id IN (#{sub_batch.select(:id).to_sql}))
UPDATE
vulnerability_occurrences f
SET
initial_pipeline_id = c.pipeline_id
FROM
cte c
WHERE
f.id = c.occurrence_id
SQL
end
def update_latest_sql(sub_batch)
<<~SQL
WITH cte AS (
SELECT
occurrence_pipelines.*
FROM
vulnerability_occurrences,
LATERAL (
SELECT
occurrence_id,
pipeline_id
FROM
vulnerability_occurrence_pipelines
WHERE
vulnerability_occurrence_pipelines.occurrence_id = vulnerability_occurrences.id
ORDER BY
pipeline_id DESC
LIMIT 1) AS occurrence_pipelines
WHERE
vulnerability_occurrences.id IN (#{sub_batch.select(:id).to_sql}))
UPDATE
vulnerability_occurrences f
SET
latest_pipeline_id = c.pipeline_id
FROM
cte c
WHERE
f.id = c.occurrence_id
SQL
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BackfillFindingInitialPipelineId, feature_category: :vulnerability_management, schema: 20240514030609 do
subject(:perform_migration) { described_class.new(**args).perform }
let(:args) do
{
start_id: vulnerability_findings.first.id,
end_id: vulnerability_findings.last.id,
batch_table: :vulnerability_occurrences,
batch_column: :id,
sub_batch_size: total_findings_count,
pause_ms: 0,
connection: ApplicationRecord.connection
}
end
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:scanners) { table(:vulnerability_scanners) }
let(:vulnerability_identifiers) { table(:vulnerability_identifiers) }
let(:vulnerability_findings) { table(:vulnerability_occurrences) }
let(:vulnerability_finding_pipelines) { table(:vulnerability_occurrence_pipelines) }
let(:group_namespace) { namespaces.create!(name: 'gitlab-org', path: 'gitlab-org', type: 'Group') }
let(:project) { create_project('gitlab', group_namespace) }
let!(:finding1) do
create_finding.tap do |finding|
create_finding_pipeline!(finding_id: finding.id, pipeline_id: 1) # initial
create_finding_pipeline!(finding_id: finding.id, pipeline_id: 2)
create_finding_pipeline!(finding_id: finding.id, pipeline_id: 3) # latest
end
end
let!(:finding2) do
create_finding.tap do |finding|
create_finding_pipeline!(finding_id: finding.id, pipeline_id: 4) # initial and latest
end
end
# no pipeline as a sanity check to make sure nothing blows up
let!(:finding3) { create_finding }
let!(:finding4) do
create_finding(initial_pipeline_id: 5, latest_pipeline_id: 5).tap do |finding|
# initial_pipeline_id is already set to 5, so it should
# not update to 555
create_finding_pipeline!(finding_id: finding.id, pipeline_id: 555)
end
end
let(:total_findings_count) { vulnerability_findings.all.count }
it 'backfills initial_pipeline_id and latest_pipeline_id', :aggregate_failures do
# .to(1) because finding3 has no pipeline
expect { perform_migration }.to change {
vulnerability_findings.where(initial_pipeline_id: nil).count
}.from(3).to(1).and change { vulnerability_findings.where(initial_pipeline_id: nil).count }.from(3).to(1)
expect(finding1.reload.initial_pipeline_id).to eq(1)
expect(finding2.reload.initial_pipeline_id).to eq(4)
expect(finding3.reload.initial_pipeline_id).to be_nil
expect(finding4.reload.initial_pipeline_id).to eq(5)
expect(finding1.latest_pipeline_id).to eq(3)
expect(finding2.latest_pipeline_id).to eq(4)
expect(finding3.latest_pipeline_id).to be_nil
expect(finding4.latest_pipeline_id).to eq(5)
end
it 'does not have N+1 queries' do
def run_migration(end_id: vulnerability_findings.last.id)
described_class.new(**args.merge(end_id: end_id)).perform
end
control = ::ActiveRecord::QueryRecorder.new { run_migration }
# reset backfilled rows to nil
vulnerability_findings.update_all(initial_pipeline_id: nil, latest_pipeline_id: nil)
create_finding.tap do |finding|
create_finding_pipeline!(finding_id: finding.id, pipeline_id: 91)
end
expect { run_migration }.not_to exceed_query_limit(control)
end
context 'when a sub-batch only contains findings without an associated pipeline' do
# finding3 has no pipeline associated with it
let(:args) { super().merge(start_id: finding3.id, end_id: finding3.id) }
it 'does not raise an error' do
expect { perform_migration }.not_to raise_error
end
end
def create_finding(initial_pipeline_id: nil, latest_pipeline_id: nil)
scanner = scanners.find_or_create_by!(name: 'bar') do |scanner|
scanner.project_id = project.id
scanner.external_id = 'foo'
end
identifier = vulnerability_identifiers.create!(
project_id: project.id,
external_id: "CVE-2018-1234",
external_type: "CVE",
name: "CVE-2018-1234",
fingerprint: SecureRandom.hex(20)
)
vulnerability_findings.create!(
project_id: project.id,
scanner_id: scanner.id,
severity: 5, # medium
confidence: 2, # unknown,
report_type: 99, # generic
primary_identifier_id: identifier.id,
project_fingerprint: SecureRandom.hex(20),
location_fingerprint: SecureRandom.hex(20),
uuid: SecureRandom.uuid,
name: "CVE-2018-1234",
raw_metadata: "{}",
metadata_version: "test:1.0",
initial_pipeline_id: initial_pipeline_id,
latest_pipeline_id: latest_pipeline_id
)
end
def create_finding_pipeline!(finding_id:, pipeline_id:)
vulnerability_finding_pipelines.create!(pipeline_id: pipeline_id, occurrence_id: finding_id)
end
def create_project(name, group)
project_namespace = namespaces.create!(
name: name,
path: name,
type: 'Project'
)
projects.create!(
namespace_id: group.id,
project_namespace_id: project_namespace.id,
name: name,
path: name,
archived: false
)
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe QueueBackfillFindingInitialPipelineId, feature_category: :vulnerability_management do
let!(:batched_migration) { described_class::MIGRATION }
it 'schedules a new batched migration' do
reversible_migration do |migration|
migration.before -> {
expect(batched_migration).not_to have_scheduled_batched_migration
}
migration.after -> {
expect(batched_migration).to have_scheduled_batched_migration(
table_name: :vulnerability_occurrences,
column_name: :id,
interval: described_class::DELAY_INTERVAL,
batch_size: described_class::BATCH_SIZE,
sub_batch_size: described_class::SUB_BATCH_SIZE
)
}
end
end
end
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册