From dfbf02a0b78000ec9d9115b8953ac969f0038e3c Mon Sep 17 00:00:00 2001 From: Adam Hegyi <ahegyi@gitlab.com> Date: Thu, 15 Feb 2024 08:03:39 +0100 Subject: [PATCH] Enable VSA cumulative duration calculation This change enables the value stream analytics cumulative duration calculation for label-based stages. Changelog: added EE: true --- .../cycle_analytics/stage_event_model.rb | 29 +---- .../vsa_duration_from_db.yml | 9 -- .../group/value_stream_analytics/index.md | 5 +- ..._cumulative_label_duration_calculation.yml | 2 +- .../aggregated/base_query_builder_spec.rb | 20 +-- .../data_for_duration_chart_spec.rb | 26 +--- .../shared_stage_shared_examples.rb | 123 ++++++------------ .../aggregated/base_query_builder.rb | 16 --- .../aggregated/stage_query_helpers.rb | 21 +-- .../cycle_analytics/issue_stage_events.rb | 5 + .../merge_request_stage_events.rb | 5 + .../stage_event_model_examples.rb | 12 +- 12 files changed, 68 insertions(+), 205 deletions(-) delete mode 100644 config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml diff --git a/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb b/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb index 8f725ab6be947..3c0ee6d8090cf 100644 --- a/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb +++ b/app/models/concerns/analytics/cycle_analytics/stage_event_model.rb @@ -49,24 +49,6 @@ module StageEventModel } ) end - scope :order_by_calculated_duration, -> (direction) do - # ORDER BY EXTRACT('epoch', end_event_timestamp - start_event_timestamp) - duration = Arel::Nodes::Subtraction.new( - arel_table[:end_event_timestamp], - arel_table[:start_event_timestamp] - ) - duration_in_seconds = Arel::Nodes::Extract.new(duration, :epoch) - - # start_event_timestamp and end_event_timestamp do not really influence the order, - # but are included so that they are part of the returned result, for example when - # using Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher - keyset_order( - :total_time => { order_expression: arel_order(duration_in_seconds, direction), distinct: false, sql_type: 'double precision' }, - issuable_id_column => { order_expression: arel_order(arel_table[issuable_id_column], direction), distinct: true }, - :end_event_timestamp => { order_expression: arel_order(arel_table[:end_event_timestamp], direction), distinct: true }, - :start_event_timestamp => { order_expression: arel_order(arel_table[:start_event_timestamp], direction), distinct: true } - ) - end scope :order_by_db_duration, -> (direction) do # start_event_timestamp and end_event_timestamp do not really influence the order, # but are included so that they are part of the returned result, for example when @@ -88,16 +70,7 @@ def issuable_id end def total_time - # Temporary until the vsa_duration_from_db feature flag is cleaned up. After that - # we can use the duration_in_milliseconds attribute. - # - # - total_time attribute: calculated from end_event_timestamp - start_event_timestamp - # - duration_in_milliseconds: newly introduced column in the table - duration_in_ms = read_attribute(:duration_in_milliseconds) - # cast to seconds - return duration_in_ms.fdiv(1000) if duration_in_ms - - read_attribute(:total_time) || (end_event_timestamp - start_event_timestamp).to_f + duration_in_milliseconds&.fdiv(1000) end class_methods do diff --git a/config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml b/config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml deleted file mode 100644 index 7de862ff49a40..0000000000000 --- a/config/feature_flags/gitlab_com_derisk/vsa_duration_from_db.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -name: vsa_duration_from_db -feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/432574 -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/142618 -rollout_issue_url: https://gitlab.com/gitlab-com/gl-infra/production/-/issues/17476 -milestone: '16.9' -group: group::optimize -type: gitlab_com_derisk -default_enabled: false diff --git a/doc/user/group/value_stream_analytics/index.md b/doc/user/group/value_stream_analytics/index.md index 6f820c8609177..33ca675f53054 100644 --- a/doc/user/group/value_stream_analytics/index.md +++ b/doc/user/group/value_stream_analytics/index.md @@ -202,9 +202,10 @@ Keep in mind the following observations related to this example: #### Cumulative label event duration > - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/432576) in GitLab 16.9 [with a flag](../../../administration/feature_flags.md) named `enable_vsa_cumulative_label_duration_calculation` and `vsa_duration_from_db`. These feature flags are disabled by default. +> - [Enabled](https://gitlab.com/gitlab-org/gitlab/-/issues/432576) in GitLab 16.10 by default. FLAG: -On self-managed GitLab, by default this feature is not available. To make it available, an administrator can [enable the feature flag](../../../administration/feature_flags.md) named `enable_vsa_cumulative_label_duration_calculation` and `vsa_duration_from_db`. On GitLab.com, this feature is available. +On self-managed GitLab, by default this feature is available. To disable it, an administrator can [disable the feature flag](../../../administration/feature_flags.md) named `enable_vsa_cumulative_label_duration_calculation`. On GitLab.com, this feature is available. With this feature, value stream analytics measures the duration of repetitive events for label-based stages. You should configure label removal or addition events for both start and end events. @@ -219,7 +220,7 @@ With the original calculation method, the duration is five hours (from 9:00 to 1 With cumulative label event duration calculation enabled, the duration is three hours (9:00 to 10:00 and 12:00 to 14:00). NOTE: -When you upgrade your GitLab version and enable the feature flag `enable_vsa_cumulative_label_duration_calculation`, existing label-based value stream analytics stages are automatically reaggregated using the background aggregation process. +When you upgrade your GitLab version to 16.10 (or to a higher version), existing label-based value stream analytics stages are automatically reaggregated using the background aggregation process. ### How value stream analytics identifies the production environment diff --git a/ee/config/feature_flags/beta/enable_vsa_cumulative_label_duration_calculation.yml b/ee/config/feature_flags/beta/enable_vsa_cumulative_label_duration_calculation.yml index b5115c7d41325..d591d41ca927f 100644 --- a/ee/config/feature_flags/beta/enable_vsa_cumulative_label_duration_calculation.yml +++ b/ee/config/feature_flags/beta/enable_vsa_cumulative_label_duration_calculation.yml @@ -6,4 +6,4 @@ rollout_issue_url: https://gitlab.com/gitlab-com/gl-infra/production/-/issues/17 milestone: '16.9' group: group::optimize type: beta -default_enabled: false +default_enabled: true diff --git a/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb b/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb index 845a74e8a1f27..50f5195e3efad 100644 --- a/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb +++ b/ee/spec/lib/ee/gitlab/analytics/cycle_analytics/aggregated/base_query_builder_spec.rb @@ -240,24 +240,10 @@ expect(issue_ids).to eq([stage_event_2.issue_id, stage_event_1.issue_id]) end - context 'when feature flag vsa_duration_from_db is disabled' do - before do - stub_feature_flags(vsa_duration_from_db: false) - end - - it 'returns the items in order (by duration calculated by end event time - start event time)' do - params[:sort] = :duration - - expect(issue_ids).to eq([stage_event_1.issue_id, stage_event_2.issue_id]) - end - end - - context 'when feature flag vsa_duration_from_db is enabled' do - it 'returns the items in order (by db duration value)' do - params[:sort] = :duration + it 'returns the items in order (by db duration value)' do + params[:sort] = :duration - expect(issue_ids).to eq([stage_event_2.issue_id, stage_event_1.issue_id]) - end + expect(issue_ids).to eq([stage_event_2.issue_id, stage_event_1.issue_id]) end it 'handles the project_ids filter' do diff --git a/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb b/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb index b1a050ae43e6f..af1078d1017e1 100644 --- a/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb +++ b/ee/spec/lib/gitlab/analytics/cycle_analytics/aggregated/data_for_duration_chart_spec.rb @@ -44,28 +44,12 @@ ) end - context 'when feature flag vsa_duration_from_db is enabled' do - it 'calculates duration from database column duration_in_milliseconds' do - average_two_days_ago = result[0] - average_today = result[1] + it 'calculates duration from database column duration_in_milliseconds' do + average_two_days_ago = result[0] + average_today = result[1] - expect(average_two_days_ago).to have_attributes(date: end_timestamp_1.to_date, average_duration_in_seconds: 100000) - expect(average_today).to have_attributes(date: end_timestamp_2.to_date, average_duration_in_seconds: 25005) - end - end - - context 'when feature flag vsa_duration_from_db is disabled' do - before do - stub_feature_flags(vsa_duration_from_db: false) - end - - it 'calculates duration from start and end events' do - average_two_days_ago = result[0] - average_today = result[1] - - expect(average_two_days_ago).to have_attributes(date: end_timestamp_1.to_date, average_duration_in_seconds: 3.days) - expect(average_today).to have_attributes(date: end_timestamp_2.to_date, average_duration_in_seconds: 2.days) - end + expect(average_two_days_ago).to have_attributes(date: end_timestamp_1.to_date, average_duration_in_seconds: 100000) + expect(average_today).to have_attributes(date: end_timestamp_2.to_date, average_duration_in_seconds: 25005) end end end diff --git a/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb b/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb index 688c008197253..85f10dbf2df0b 100644 --- a/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb +++ b/ee/spec/support/shared_examples/controllers/analytics/cycle_analytics/shared_stage_shared_examples.rb @@ -101,97 +101,48 @@ params.merge!(sort: 'duration', direction: 'asc') end - context 'when feature flag vsa_duration_from_db is enabled' do - it 'accepts sort params and orders the items (by duration in database)' do - travel_to DateTime.new(2019, 1, 5) do - event_1 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 1, - start_event_timestamp: Time.current, - end_event_timestamp: 20.days.from_now, - duration_in_milliseconds: 5000 - ) - - event_2 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 2, - start_event_timestamp: Time.current, - end_event_timestamp: 1.day.from_now, - duration_in_milliseconds: 3000 - ) - - event_3 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 3, - start_event_timestamp: Time.current, - end_event_timestamp: 3.days.from_now, - duration_in_milliseconds: 15000 - ) - - expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher) do |records_fetcher| - records_fetcher.serialized_records do |raw_active_record_scope| - expect(raw_active_record_scope.pluck(:merge_request_id)).to eq([event_2.merge_request_id, event_1.merge_request_id, event_3.merge_request_id]) - end + it 'accepts sort params and orders the items (by duration in database)' do + travel_to DateTime.new(2019, 1, 5) do + event_1 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 1, + start_event_timestamp: Time.current, + end_event_timestamp: 20.days.from_now, + duration_in_milliseconds: 5000 + ) + + event_2 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 2, + start_event_timestamp: Time.current, + end_event_timestamp: 1.day.from_now, + duration_in_milliseconds: 3000 + ) + + event_3 = create( + :cycle_analytics_merge_request_stage_event, + stage_event_hash_id: stage.stage_event_hash_id, + group_id: stage.namespace.id, + merge_request_id: 3, + start_event_timestamp: Time.current, + end_event_timestamp: 3.days.from_now, + duration_in_milliseconds: 15000 + ) + + expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher) do |records_fetcher| + records_fetcher.serialized_records do |raw_active_record_scope| + expect(raw_active_record_scope.pluck(:merge_request_id)).to eq([event_2.merge_request_id, event_1.merge_request_id, event_3.merge_request_id]) end end - - subject - - expect(response).to have_gitlab_http_status(:ok) - end - end - - context 'when feature flag vsa_duration_from_db is disabled' do - before do - stub_feature_flags(vsa_duration_from_db: false) end - it 'accepts sort params and orders the items (by duration calculated by end event time - start event time)' do - travel_to DateTime.new(2019, 1, 5) do - event_1 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 1, - start_event_timestamp: Time.current, - end_event_timestamp: 20.days.from_now - ) - - event_2 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 2, - start_event_timestamp: Time.current, - end_event_timestamp: 1.day.from_now - ) - - event_3 = create( - :cycle_analytics_merge_request_stage_event, - stage_event_hash_id: stage.stage_event_hash_id, - group_id: stage.namespace.id, - merge_request_id: 3, - start_event_timestamp: Time.current, - end_event_timestamp: 3.days.from_now - ) - - expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher) do |records_fetcher| - records_fetcher.serialized_records do |raw_active_record_scope| - expect(raw_active_record_scope.pluck(:merge_request_id)).to eq([event_2.merge_request_id, event_3.merge_request_id, event_1.merge_request_id]) - end - end - end - - subject + subject - expect(response).to have_gitlab_http_status(:ok) - end + expect(response).to have_gitlab_http_status(:ok) end end diff --git a/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb b/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb index dfe3b244e2dbc..085f722a66cb8 100644 --- a/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb +++ b/lib/gitlab/analytics/cycle_analytics/aggregated/base_query_builder.rb @@ -41,25 +41,9 @@ def build_sorted_query direction = params[:direction] || :desc if params[:sort] == :duration - order_by_duration(direction) - else - order_by_end_event(direction) - end - end - - def order_by_duration(direction) - if Feature.enabled?(:vsa_duration_from_db, type: :gitlab_com_derisk) build.order_by_db_duration(direction) else - build.order_by_calculated_duration(direction) - end - end - - def order_by_end_event(direction) - if Feature.enabled?(:vsa_duration_from_db, type: :gitlab_com_derisk) build.order_by_end_event_with_db_duration(direction) - else - build.order_by_end_event(direction) end end diff --git a/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb b/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb index c68b4c9579cbe..d5fca419ddc4b 100644 --- a/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb +++ b/lib/gitlab/analytics/cycle_analytics/aggregated/stage_query_helpers.rb @@ -35,20 +35,13 @@ def duration_in_seconds(duration_expression = duration) private def calculate_duration - if Feature.enabled?(:vsa_duration_from_db, type: :gitlab_com_derisk) - # Cast the duration_in_milliseconds bigint column to interval. We divide the - # duration by 1000 thus we get a double precision number (seconds). The division is necessary - # to avoid query errors for very long duration values. - # Reason: Interval is parsed as a 32 bit integer and when the duration_in_milliseconds has - # a very high value the parsing will fail. - column = "#{query.model.quoted_table_name}.duration_in_milliseconds / 1000.0 || ' seconds'" - Arel::Nodes::NamedFunction.new('CAST', [Arel.sql("(#{column}) AS interval")]) - else - Arel::Nodes::Subtraction.new( - query.model.arel_table[:end_event_timestamp], - query.model.arel_table[:start_event_timestamp] - ) - end + # Cast the duration_in_milliseconds bigint column to interval. We divide the + # duration by 1000 thus we get a double precision number (seconds). The division is necessary + # to avoid query errors for very long duration values. + # Reason: Interval is parsed as a 32 bit integer and when the duration_in_milliseconds has + # a very high value the parsing will fail. + column = "#{query.model.quoted_table_name}.duration_in_milliseconds / 1000.0 || ' seconds'" + Arel::Nodes::NamedFunction.new('CAST', [Arel.sql("(#{column}) AS interval")]) end end end diff --git a/spec/factories/analytics/cycle_analytics/issue_stage_events.rb b/spec/factories/analytics/cycle_analytics/issue_stage_events.rb index 8ad8815261136..48754abe7a2ca 100644 --- a/spec/factories/analytics/cycle_analytics/issue_stage_events.rb +++ b/spec/factories/analytics/cycle_analytics/issue_stage_events.rb @@ -9,5 +9,10 @@ start_event_timestamp { 3.weeks.ago.to_date } end_event_timestamp { 2.weeks.ago.to_date } + duration_in_milliseconds do + if start_event_timestamp && end_event_timestamp + (end_event_timestamp.to_time - start_event_timestamp.to_time).in_milliseconds + end + end end end diff --git a/spec/factories/analytics/cycle_analytics/merge_request_stage_events.rb b/spec/factories/analytics/cycle_analytics/merge_request_stage_events.rb index d8fa43b024f82..1e532e76ace44 100644 --- a/spec/factories/analytics/cycle_analytics/merge_request_stage_events.rb +++ b/spec/factories/analytics/cycle_analytics/merge_request_stage_events.rb @@ -9,5 +9,10 @@ start_event_timestamp { 3.weeks.ago.to_date } end_event_timestamp { 2.weeks.ago.to_date } + duration_in_milliseconds do + if start_event_timestamp && end_event_timestamp + (end_event_timestamp.to_time - start_event_timestamp.to_time).in_milliseconds + end + end end end diff --git a/spec/support/shared_examples/models/concerns/analytics/cycle_analytics/stage_event_model_examples.rb b/spec/support/shared_examples/models/concerns/analytics/cycle_analytics/stage_event_model_examples.rb index 13a7f71fdb270..f10c2c5d12120 100644 --- a/spec/support/shared_examples/models/concerns/analytics/cycle_analytics/stage_event_model_examples.rb +++ b/spec/support/shared_examples/models/concerns/analytics/cycle_analytics/stage_event_model_examples.rb @@ -184,19 +184,9 @@ def attributes(array) describe '#total_time' do it 'calcualtes total time from the start_event_timestamp and end_event_timestamp columns' do - model = described_class.new(start_event_timestamp: Time.new(2022, 1, 1, 12, 5, 0), end_event_timestamp: Time.new(2022, 1, 1, 12, 6, 30)) + model = build(stage_event_factory, start_event_timestamp: Time.new(2022, 1, 1, 12, 5, 0), end_event_timestamp: Time.new(2022, 1, 1, 12, 6, 30)) expect(model.total_time).to eq(90) end - - context 'when total time is calculated in SQL as an extra column' do - it 'returns the SQL calculated time' do - create(stage_event_factory) # rubocop:disable Rails/SaveBang - - model = described_class.select('*, 5 AS total_time').first - - expect(model.total_time).to eq(5) - end - end end end -- GitLab