From 2293205d7cbccc433667de46510a8857691c4c5e Mon Sep 17 00:00:00 2001 From: charlie ablett <cablett@gitlab.com> Date: Wed, 7 Feb 2024 09:21:13 +0000 Subject: [PATCH] Use DB duration_in_milliseconds for aggregated VSA --- .../cycle_analytics/stage_event_model.rb | 39 +++++- .../vsa_duration_from_db.yml | 9 ++ .../aggregated/base_query_builder_spec.rb | 29 ++++- .../data_for_duration_chart_spec.rb | 59 +++++++-- .../shared_stage_shared_examples.rb | 120 +++++++++++++----- .../aggregated/base_query_builder.rb | 18 ++- .../aggregated/records_fetcher.rb | 1 - .../aggregated/stage_query_helpers.rb | 24 +++- 8 files changed, 242 insertions(+), 57 deletions(-) create mode 100644 config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml diff --git a/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb b/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb index 618ea7d979cb9..3fa5a6372a47c 100644 --- a/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb +++ b/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb @@ -29,7 +29,21 @@ module StageEventModel :start_event_timestamp => { order_expression: arel_order(arel_table[:start_event_timestamp], direction), distinct: false } ) end - scope :order_by_duration, -> (direction) do + scope :order_by_end_event_with_db_duration, -> (direction) do + # ORDER BY end_event_timestamp, merge_request_id/issue_id, start_event_timestamp + # start_event_timestamp must be included in the ORDER BY clause for the duration + # calculation to work: SELECT end_event_timestamp - start_event_timestamp + keyset_order( + :end_event_timestamp => { order_expression: arel_order(arel_table[:end_event_timestamp], direction), distinct: false, nullable: direction == :asc ? :nulls_last : :nulls_first }, + issuable_id_column => { order_expression: arel_order(arel_table[issuable_id_column], direction), distinct: true }, + :start_event_timestamp => { order_expression: arel_order(arel_table[:start_event_timestamp], direction), distinct: false }, + :duration_in_milliseconds => { + order_expression: arel_order(arel_table[:duration_in_milliseconds], direction), + distinct: false, nullable: direction == :asc ? :nulls_last : :nulls_first, sql_type: 'bigint' + } + ) + end + scope :order_by_calculated_duration, -> (direction) do # ORDER BY EXTRACT('epoch', end_event_timestamp - start_event_timestamp) duration = Arel::Nodes::Subtraction.new( arel_table[:end_event_timestamp], @@ -47,6 +61,20 @@ module StageEventModel :start_event_timestamp => { order_expression: arel_order(arel_table[:start_event_timestamp], direction), distinct: true } ) end + scope :order_by_db_duration, -> (direction) do + # start_event_timestamp and end_event_timestamp do not really influence the order, + # but are included so that they are part of the returned result, for example when + # using Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher + keyset_order( + :duration_in_milliseconds => { + order_expression: arel_order(arel_table[:duration_in_milliseconds], direction), + distinct: false, nullable: direction == :asc ? :nulls_last : :nulls_first, sql_type: 'bigint' + }, + issuable_id_column => { order_expression: arel_order(arel_table[issuable_id_column], direction), distinct: true }, + :end_event_timestamp => { order_expression: arel_order(arel_table[:end_event_timestamp], direction), distinct: true }, + :start_event_timestamp => { order_expression: arel_order(arel_table[:start_event_timestamp], direction), distinct: true } + ) + end end def issuable_id @@ -54,6 +82,15 @@ def issuable_id end def total_time + # Temporary until the vsa_duration_from_db feature flag is cleaned up. After that + # we can use the duration_in_milliseconds attribute. + # + # - total_time attribute: calculated from end_event_timestamp - start_event_timestamp + # - duration_in_milliseconds: newly introduced column in the table + duration_in_ms = read_attribute(:duration_in_milliseconds) + # cast to seconds + return duration_in_ms.fdiv(1000) if duration_in_ms + read_attribute(:total_time) || (end_event_timestamp - start_event_timestamp).to_f end diff --git a/config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml b/config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml new file mode 100644 index 0000000000000..7de862ff49a40 --- /dev/null +++ b/config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml @@ -0,0 +1,9 @@ +--- +name: vsa_duration_from_db +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/432574 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/142618 +rollout_issue_url: https://gitlab.com/gitlab-com/gl-infra/production/-/issues/17476 +milestone: '16.9' +group: group::optimize +type: gitlab_com_derisk +default_enabled: false diff --git a/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb b/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb index a4c1562ae6248..845a74e8a1f27 100644 --- a/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb +++ b/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb @@ -39,7 +39,8 @@ start_event_timestamp: 4.weeks.ago, end_event_timestamp: 1.week.ago, sprint_id: iteration.id, - issue_id: issue_with_iteration.id + issue_id: issue_with_iteration.id, + duration_in_milliseconds: 3000 ) end @@ -53,7 +54,8 @@ end_event_timestamp: 1.week.ago, milestone_id: milestone.id, weight: 5, - issue_id: issue_with_epic.id + issue_id: issue_with_epic.id, + duration_in_milliseconds: 57000 ) end @@ -62,7 +64,8 @@ stage_event_hash_id: stage.stage_event_hash_id, group_id: other_group.id, project_id: other_project.id, - issue_id: create(:issue, project: other_project).id + issue_id: create(:issue, project: other_project).id, + duration_in_milliseconds: 5000 ) end @@ -237,10 +240,24 @@ expect(issue_ids).to eq([stage_event_2.issue_id, stage_event_1.issue_id]) end - it 'returns the items in order (by duration)' do - params[:sort] = :duration + context 'when feature flag vsa_duration_from_db is disabled' do + before do + stub_feature_flags(vsa_duration_from_db: false) + end + + it 'returns the items in order (by duration calculated by end event time - start event time)' do + params[:sort] = :duration - expect(issue_ids).to eq([stage_event_1.issue_id, stage_event_2.issue_id]) + expect(issue_ids).to eq([stage_event_1.issue_id, stage_event_2.issue_id]) + end + end + + context 'when feature flag vsa_duration_from_db is enabled' do + it 'returns the items in order (by db duration value)' do + params[:sort] = :duration + + expect(issue_ids).to eq([stage_event_2.issue_id, stage_event_1.issue_id]) + end end it 'handles the project_ids filter' do diff --git a/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb b/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb index 3a50966937bfb..b1a050ae43e6f 100644 --- a/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb +++ b/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb @@ -15,18 +15,57 @@ .average_by_day end - it 'calculates the daily average stage duration' do - end_timestamp_1 = Time.zone.local(2020, 5, 6, 12, 0) - end_timestamp_2 = Time.zone.local(2020, 5, 15, 10, 0) + describe 'calculating the daily average stage duration', :aggregate_failures do + let_it_be(:end_timestamp_1) { Time.zone.local(2020, 5, 6, 12, 0) } + let_it_be(:end_timestamp_2) { Time.zone.local(2020, 5, 15, 10, 0) } - create(:cycle_analytics_issue_stage_event, issue_id: issue_1.id, start_event_timestamp: end_timestamp_1 - 3.days, end_event_timestamp: end_timestamp_1) # 3 days - create(:cycle_analytics_issue_stage_event, issue_id: issue_2.id, start_event_timestamp: end_timestamp_2 - 3.days, end_event_timestamp: end_timestamp_2) # 3 days - create(:cycle_analytics_issue_stage_event, issue_id: issue_3.id, start_event_timestamp: end_timestamp_2 - 1.day, end_event_timestamp: end_timestamp_2) # 1 day + # 1 day in milliseconds = 86_400_000 - average_two_days_ago = result[0] - average_today = result[1] + let_it_be(:event_1) do + # 3 days between start and end events + create(:cycle_analytics_issue_stage_event, issue_id: issue_1.id, + start_event_timestamp: end_timestamp_1 - 3.days, + end_event_timestamp: end_timestamp_1, duration_in_milliseconds: 100_000_000) + end - expect(average_two_days_ago).to have_attributes(date: end_timestamp_1.to_date, average_duration_in_seconds: 3.days) - expect(average_today).to have_attributes(date: end_timestamp_2.to_date, average_duration_in_seconds: 2.days) + let_it_be(:event_2) do + # 3 days between start and end events + create(:cycle_analytics_issue_stage_event, issue_id: issue_2.id, + start_event_timestamp: end_timestamp_2 - 3.days, + end_event_timestamp: end_timestamp_2, duration_in_milliseconds: 50_000_000 + ) + end + + let_it_be(:event3) do + # 1 day between start and end events + create(:cycle_analytics_issue_stage_event, issue_id: issue_3.id, + start_event_timestamp: end_timestamp_2 - 1.day, + end_event_timestamp: end_timestamp_2, duration_in_milliseconds: 10_000 + ) + end + + context 'when feature flag vsa_duration_from_db is enabled' do + it 'calculates duration from database column duration_in_milliseconds' do + average_two_days_ago = result[0] + average_today = result[1] + + expect(average_two_days_ago).to have_attributes(date: end_timestamp_1.to_date, average_duration_in_seconds: 100000) + expect(average_today).to have_attributes(date: end_timestamp_2.to_date, average_duration_in_seconds: 25005) + end + end + + context 'when feature flag vsa_duration_from_db is disabled' do + before do + stub_feature_flags(vsa_duration_from_db: false) + end + + it 'calculates duration from start and end events' do + average_two_days_ago = result[0] + average_today = result[1] + + expect(average_two_days_ago).to have_attributes(date: end_timestamp_1.to_date, average_duration_in_seconds: 3.days) + expect(average_today).to have_attributes(date: end_timestamp_2.to_date, average_duration_in_seconds: 2.days) + end + end end end diff --git a/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb b/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb index 1377b250a10b3..688c008197253 100644 --- a/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb +++ b/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb @@ -101,45 +101,97 @@ params.merge!(sort: 'duration', direction: 'asc') end - it 'accepts sort params' do - travel_to DateTime.new(2019, 1, 5) do - event_1 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 1, - start_event_timestamp: Time.current, - end_event_timestamp: 20.days.from_now - ) - - event_2 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 2, - start_event_timestamp: Time.current, - end_event_timestamp: 1.day.from_now - ) - - event_3 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 3, - start_event_timestamp: Time.current, - end_event_timestamp: 3.days.from_now - ) - - expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher) do |records_fetcher| - records_fetcher.serialized_records do |raw_active_record_scope| - expect(raw_active_record_scope.pluck(:merge_request_id)).to eq([event_2.merge_request_id, event_3.merge_request_id, event_1.merge_request_id]) + context 'when feature flag vsa_duration_from_db is enabled' do + it 'accepts sort params and orders the items (by duration in database)' do + travel_to DateTime.new(2019, 1, 5) do + event_1 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 1, + start_event_timestamp: Time.current, + end_event_timestamp: 20.days.from_now, + duration_in_milliseconds: 5000 + ) + + event_2 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 2, + start_event_timestamp: Time.current, + end_event_timestamp: 1.day.from_now, + duration_in_milliseconds: 3000 + ) + + event_3 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 3, + start_event_timestamp: Time.current, + end_event_timestamp: 3.days.from_now, + duration_in_milliseconds: 15000 + ) + + expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher) do |records_fetcher| + records_fetcher.serialized_records do |raw_active_record_scope| + expect(raw_active_record_scope.pluck(:merge_request_id)).to eq([event_2.merge_request_id, event_1.merge_request_id, event_3.merge_request_id]) + end end end + + subject + + expect(response).to have_gitlab_http_status(:ok) end + end - subject + context 'when feature flag vsa_duration_from_db is disabled' do + before do + stub_feature_flags(vsa_duration_from_db: false) + end - expect(response).to have_gitlab_http_status(:ok) + it 'accepts sort params and orders the items (by duration calculated by end event time - start event time)' do + travel_to DateTime.new(2019, 1, 5) do + event_1 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 1, + start_event_timestamp: Time.current, + end_event_timestamp: 20.days.from_now + ) + + event_2 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 2, + start_event_timestamp: Time.current, + end_event_timestamp: 1.day.from_now + ) + + event_3 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 3, + start_event_timestamp: Time.current, + end_event_timestamp: 3.days.from_now + ) + + expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher) do |records_fetcher| + records_fetcher.serialized_records do |raw_active_record_scope| + expect(raw_active_record_scope.pluck(:merge_request_id)).to eq([event_2.merge_request_id, event_3.merge_request_id, event_1.merge_request_id]) + end + end + end + + subject + + expect(response).to have_gitlab_http_status(:ok) + end end end diff --git a/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb b/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb index fc0e4ab5a0de4..dfe3b244e2dbc 100644 --- a/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb +++ b/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb @@ -41,7 +41,23 @@ def build_sorted_query direction = params[:direction] || :desc if params[:sort] == :duration - build.order_by_duration(direction) + order_by_duration(direction) + else + order_by_end_event(direction) + end + end + + def order_by_duration(direction) + if Feature.enabled?(:vsa_duration_from_db, type: :gitlab_com_derisk) + build.order_by_db_duration(direction) + else + build.order_by_calculated_duration(direction) + end + end + + def order_by_end_event(direction) + if Feature.enabled?(:vsa_duration_from_db, type: :gitlab_com_derisk) + build.order_by_end_event_with_db_duration(direction) else build.order_by_end_event(direction) end diff --git a/lib/gitlab/analytics/cycle_analytics/aggregated/records_fetcher.rb b/lib/gitlab/analytics/cycle_analytics/aggregated/records_fetcher.rb index 6a1529ade9298..d482cbac9fcb2 100644 --- a/lib/gitlab/analytics/cycle_analytics/aggregated/records_fetcher.rb +++ b/lib/gitlab/analytics/cycle_analytics/aggregated/records_fetcher.rb @@ -42,7 +42,6 @@ def serialized_records # InOperatorOptimization::QueryBuilder only columns # used in ORDER BY statement would be selected by Arel.star operation selections = [stage_event_model.arel_table[Arel.star]] - selections << duration_in_seconds.as('total_time') if params[:sort] != :duration # duration sorting already exposes this data records = limited_query.select(*selections) diff --git a/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb b/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb index 1d041e7627700..c68b4c9579cbe 100644 --- a/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb +++ b/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb @@ -20,10 +20,7 @@ def duration query.model.arel_table[:start_event_timestamp] ) else - Arel::Nodes::Subtraction.new( - query.model.arel_table[:end_event_timestamp], - query.model.arel_table[:start_event_timestamp] - ) + calculate_duration end end @@ -34,6 +31,25 @@ def in_progress? def duration_in_seconds(duration_expression = duration) Arel::Nodes::NamedFunction.new('CAST', [Arel::Nodes::Extract.new(duration_expression, :epoch).as('double precision')]) end + + private + + def calculate_duration + if Feature.enabled?(:vsa_duration_from_db, type: :gitlab_com_derisk) + # Cast the duration_in_milliseconds bigint column to interval. We divide the + # duration by 1000 thus we get a double precision number (seconds). The division is necessary + # to avoid query errors for very long duration values. + # Reason: Interval is parsed as a 32 bit integer and when the duration_in_milliseconds has + # a very high value the parsing will fail. + column = "#{query.model.quoted_table_name}.duration_in_milliseconds / 1000.0 || ' seconds'" + Arel::Nodes::NamedFunction.new('CAST', [Arel.sql("(#{column}) AS interval")]) + else + Arel::Nodes::Subtraction.new( + query.model.arel_table[:end_event_timestamp], + query.model.arel_table[:start_event_timestamp] + ) + end + end end end end -- GitLab