Skip to content
代码片段 群组 项目
未验证 提交 946eb248 编辑于 作者: Simon Tomlinson's avatar Simon Tomlinson 提交者: GitLab
浏览文件

Merge branch 'ghavenga-increase-scan-purge-iteration-size' into 'master'

No related branches found
No related tags found
2 合并请求!3031Merge per-main-jh to main-jh by luzhiyuan,!3030Merge per-main-jh to main-jh
......@@ -2,32 +2,54 @@
module Security
class PurgeScansService
MAX_STALE_SCANS_SIZE = 50_000
include Gitlab::Utils::StrongMemoize
MAX_STALE_SCANS_SIZE = 200_000
SCAN_BATCH_SIZE = 100
# To optimise purging against rereading dead tuples on progressive purge executions
# we cache the last purged tuple so that the next job can start where the prior finished.
# The TTL for this is in hours so that we'll start from the beginning the following weekend.
LAST_PURGED_SCAN_TUPLE = 'Security::PurgeScansService::LAST_PURGED_SCAN_TUPLE'
LAST_PURGED_SCAN_TUPLE_TTL = 24.hours.to_i
class << self
def purge_stale_records
Security::Scan.stale.ordered_by_created_at_and_id.then { |relation| execute(relation) }
execute(Security::Scan.stale.ordered_by_created_at_and_id, last_purged_tuple)
end
def purge_by_build_ids(build_ids)
Security::Scan.by_build_ids(build_ids).then { |relation| execute(relation) }
end
def execute(security_scans)
new(security_scans).execute
def execute(security_scans, cursor = {})
new(security_scans, cursor).execute
end
private
# returns {} if no last tuple was set
# we exclude `ex` because it's the expiry timer for the tuple used by redis
def last_purged_tuple
Gitlab::Redis::SharedState.with do |redis|
redis.hgetall(LAST_PURGED_SCAN_TUPLE)
end.except('ex')
end
end
def initialize(security_scans)
@iterator = Gitlab::Pagination::Keyset::Iterator.new(scope: security_scans)
def initialize(security_scans, cursor = {})
@iterator = Gitlab::Pagination::Keyset::Iterator.new(scope: security_scans, cursor: cursor)
@updated_count = 0
end
def execute
iterator.each_batch(of: SCAN_BATCH_SIZE) do |batch|
last_updated_record = batch.last
@updated_count += purge(batch)
store_last_purged_tuple(last_updated_record.created_at, last_updated_record.id) if last_updated_record
break if @updated_count >= MAX_STALE_SCANS_SIZE
end
end
......@@ -39,5 +61,16 @@ def execute
def purge(scan_batch)
scan_batch.update_all(status: :purged)
end
# Normal to string methods for dates don't include the split seconds that rails usually includes in queries.
# Without them, it's possible to still match on the last processed record instead of the one after it.
def store_last_purged_tuple(created_at, id)
Gitlab::Redis::SharedState.with do |redis|
redis.hset(LAST_PURGED_SCAN_TUPLE, {
"created_at" => created_at.strftime("%Y-%m-%d %H:%M:%S.%6N"),
"id" => id
}, ex: LAST_PURGED_SCAN_TUPLE_TTL)
end
end
end
end
......@@ -4,9 +4,13 @@
RSpec.describe Security::PurgeScansService, feature_category: :vulnerability_management do
describe 'class interface' do
describe '.purge_stale_records' do
let(:stale_scan) { create(:security_scan, created_at: 91.days.ago) }
let(:fresh_scan) { create(:security_scan) }
describe '.purge_stale_records', :clean_gitlab_redis_shared_state do
let!(:stale_scan) { create(:security_scan, created_at: 92.days.ago) }
let(:stale_scan_tuple_cache) do
{ "created_at" => stale_scan.created_at.strftime("%Y-%m-%d %H:%M:%S.%6N"), "id" => stale_scan.id.to_s }
end
let!(:fresh_scan) { create(:security_scan) }
subject(:purge_stale_records) { described_class.purge_stale_records }
......@@ -14,6 +18,36 @@
expect { purge_stale_records }.to change { stale_scan.reload.status }.to("purged")
.and not_change { fresh_scan.reload.status }
end
describe 'dead tuple optimisation' do
def cached_tuple
Gitlab::Redis::SharedState.with do |redis|
redis.hgetall(described_class::LAST_PURGED_SCAN_TUPLE)
end
end
it 'caches a previous purged tuple' do
expect { purge_stale_records }.to change {
cached_tuple
}.from({}).to(stale_scan_tuple_cache.merge("ex" => described_class::LAST_PURGED_SCAN_TUPLE_TTL.to_s))
end
context 'when a previous purged tuple is cached' do
let!(:second_stale_scan) { create(:security_scan, created_at: 91.days.ago) }
before do
Gitlab::Redis::SharedState.with do |redis|
redis.hset(described_class::LAST_PURGED_SCAN_TUPLE, stale_scan_tuple_cache,
ex: described_class::LAST_PURGED_SCAN_TUPLE_TTL)
end
end
it 'uses the cached tuple to scope the query and skip already checked values' do
expect { purge_stale_records }.to change { second_stale_scan.reload.status }.to("purged")
.and not_change { stale_scan.reload.status }
end
end
end
end
describe '.purge_by_build_ids' do
......
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册