diff --git a/config/metrics/schema/redis_hll.json b/config/metrics/schema/redis_hll.json index 255a4f89b9008e522af203e9a2d10860831de11c..a70c0f3a8a7c048f944b5c11f76eaabd9916ef32 100644 --- a/config/metrics/schema/redis_hll.json +++ b/config/metrics/schema/redis_hll.json @@ -57,7 +57,10 @@ ] }, "attribute": { - "type": "string" + "enum": [ + "user_id", + "project_id" + ] } }, "required": [ diff --git a/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb b/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb index edba7f215d842901236b3b4ba24bbd7b3298f17b..e7692c6803ff986bc9b4a97b45a9b33afb1db925 100644 --- a/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb +++ b/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb @@ -40,7 +40,7 @@ it 'returns the number of unique events for aggregation', :aggregate_failures do expect(namespace::SOURCES[datasource]) .to receive(:calculate_metrics_intersections) - .with(params.merge(metric_names: events)) + .with(params.merge(metric_names: events, property_name: nil)) .and_return(5) expect(calculate_count_for_aggregation).to eq(5) end diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb index a0a58534661faa5e72f484e4c5f8fd6cd53653ff..19fc3a90be3c041a99aea645c658a887b2b704f0 100644 --- a/lib/gitlab/usage/metrics/aggregates/aggregate.rb +++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb @@ -15,11 +15,12 @@ def calculate_count_for_aggregation(aggregation:, time_frame:) with_validate_configuration(aggregation, time_frame) do source = SOURCES[aggregation[:source]] events = select_defined_events(aggregation[:events], aggregation[:source]) + property_name = aggregation[:attribute] if aggregation[:operator] == UNION_OF_AGGREGATED_METRICS - source.calculate_metrics_union(**time_constraints(time_frame).merge(metric_names: events, recorded_at: recorded_at)) + source.calculate_metrics_union(**time_constraints(time_frame).merge(metric_names: events, property_name: property_name, recorded_at: recorded_at)) else - source.calculate_metrics_intersections(**time_constraints(time_frame).merge(metric_names: events, recorded_at: recorded_at)) + source.calculate_metrics_intersections(**time_constraints(time_frame).merge(metric_names: events, property_name: property_name, recorded_at: recorded_at)) end end rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error diff --git a/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb b/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb index c8c248905f7f6fa39b91c2ae110dc330fbfc86d7..7c9561cd06d4ae20f2430496fb51fcbf4263c483 100644 --- a/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb +++ b/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb @@ -7,7 +7,7 @@ module Aggregates module Sources module Calculations module Intersection - def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recorded_at:, subset_powers_cache: Hash.new({})) + def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recorded_at:, property_name:, subset_powers_cache: Hash.new({})) # calculate power of intersection of all given metrics from inclusion exclusion principle # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) => # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| @@ -15,11 +15,11 @@ def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recor # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... - subset_powers_data = subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, subset_powers_cache) + subset_powers_data = subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, property_name, subset_powers_cache) # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D| power_of_union_of_all_metrics = subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \ - calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate, # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below @@ -38,7 +38,7 @@ def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recor private - def subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, subset_powers_cache) + def subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, property_name, subset_powers_cache) subset_sizes = (1...metric_names.size) subset_sizes.map do |subset_size| @@ -46,13 +46,13 @@ def subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|) metric_names.combination(subset_size).sum do |metrics_subset| subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||= - calculate_metrics_intersections(metric_names: metrics_subset, start_date: start_date, end_date: end_date, recorded_at: recorded_at, subset_powers_cache: subset_powers_cache) + calculate_metrics_intersections(metric_names: metrics_subset, start_date: start_date, end_date: end_date, recorded_at: recorded_at, subset_powers_cache: subset_powers_cache, property_name: property_name) end else # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ... metric_names.sum do |metric| subset_powers_cache[subset_size][metric] ||= \ - calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) end end end diff --git a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb index eccf79b9703240bd536788f94629a391f5f81f93..88677f6a90c755e02fb18b53a5f4c14ec18c91ac 100644 --- a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb +++ b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb @@ -8,7 +8,7 @@ module Sources class PostgresHll extend Calculations::Intersection class << self - def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:) + def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:, property_name:) # rubocop:disable Lint/UnusedMethodArgument -- property_name is used only for RedisHLL source time_period = start_date && end_date ? (start_date..end_date) : nil Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets| diff --git a/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb index 1bdf3a7f9d889b6b673e2ea15c73bfd9f22a2769..d60396152a9a1c53fcf81f0b622279912f30f83d 100644 --- a/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb +++ b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb @@ -7,9 +7,14 @@ module Aggregates module Sources class RedisHll extend Calculations::Intersection - def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil) - union = Gitlab::UsageDataCounters::HLLRedisCounter - .calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date) + + def self.calculate_metrics_union(metric_names:, start_date:, end_date:, property_name:, recorded_at: nil) + union = Gitlab::UsageDataCounters::HLLRedisCounter.calculate_events_union( + event_names: metric_names, + property_name: property_name, + start_date: start_date, + end_date: end_date + ) return union if union >= 0 diff --git a/lib/gitlab/usage/metrics/instrumentations/aggregated_metric.rb b/lib/gitlab/usage/metrics/instrumentations/aggregated_metric.rb index de8726f71b5c43ec6b214286f8204075c0490248..15eb12b05df055daf64965b08e4f4d9a75afd787 100644 --- a/lib/gitlab/usage/metrics/instrumentations/aggregated_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/aggregated_metric.rb @@ -22,6 +22,13 @@ module Instrumentations class AggregatedMetric < BaseMetric FALLBACK = -1 + # This map is necessary because AggregatedMetrics' `:attribute` format + # is different from internal_events `events.unique` format. + # To be fixed in https://gitlab.com/gitlab-org/gitlab/-/issues/439609 + ALLOWED_ATTRIBUTES_MAP = { + 'user_id' => 'user.id', + 'project_id' => 'project.id' + }.freeze def initialize(metric_definition) super @@ -48,9 +55,19 @@ def aggregate_config { source: source, events: options[:events], - operator: aggregate[:operator] + operator: aggregate[:operator], + attribute: attribute } end + + def attribute + config_attribute = aggregate[:attribute] + + return ALLOWED_ATTRIBUTES_MAP[config_attribute] if ALLOWED_ATTRIBUTES_MAP.key?(config_attribute) + + info = "#{self.class.name} only accepts the following aggregate.attributes: #{ALLOWED_ATTRIBUTES_MAP.keys}" + raise Gitlab::Usage::MetricDefinition::InvalidError, info + end end end end diff --git a/lib/gitlab/usage/metrics/instrumentations/total_count_metric.rb b/lib/gitlab/usage/metrics/instrumentations/total_count_metric.rb index 0e07a5b148f6f639e75d156973f81cfaeee2bc85..9bf54be190586f2bba92946aa63ed668e0167a53 100644 --- a/lib/gitlab/usage/metrics/instrumentations/total_count_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/total_count_metric.rb @@ -18,7 +18,7 @@ class TotalCountMetric < BaseMetric KEY_PREFIX = "{event_counters}_" - def self.redis_key(event_name, date = nil, _skip_file_list_validation = false) + def self.redis_key(event_name, date = nil, _used_in_aggregate_metric = false) base_key = KEY_PREFIX + event_name return base_key unless date diff --git a/lib/gitlab/usage/time_series_storable.rb b/lib/gitlab/usage/time_series_storable.rb index 4969573161b85bc18e4d9c5cc9991c958382592b..2bc74d39c097df96557611d641574c9909ae74e9 100644 --- a/lib/gitlab/usage/time_series_storable.rb +++ b/lib/gitlab/usage/time_series_storable.rb @@ -3,13 +3,13 @@ module Gitlab module Usage module TimeSeriesStorable - # requires a #redis_key(event, date, skip_file_list_validation) method to be defined - def keys_for_aggregation(events:, start_date:, end_date:, skip_file_list_validation: false) + # requires a #redis_key(event, date, used_in_aggregate_metric) method to be defined + def keys_for_aggregation(events:, start_date:, end_date:, used_in_aggregate_metric: false) # we always keep 1 week of margin # .end_of_week is necessary to make sure this works for 1 week long periods too end_date = end_date.end_of_week - 1.week (start_date.to_date..end_date.to_date).flat_map do |date| - events.map { |event| redis_key(event, date, skip_file_list_validation) } + events.map { |event| redis_key(event, date, used_in_aggregate_metric) } end.uniq end diff --git a/lib/gitlab/usage_data_counters/hll_redis_counter.rb b/lib/gitlab/usage_data_counters/hll_redis_counter.rb index 21df6d9fc5c9402e2e84d525d3d72ecf4947a1fe..3d08f69ec6b06c1f591d6ffcb2f8306eba4dcb9c 100644 --- a/lib/gitlab/usage_data_counters/hll_redis_counter.rb +++ b/lib/gitlab/usage_data_counters/hll_redis_counter.rb @@ -67,10 +67,10 @@ def known_events @known_events ||= load_events end - def calculate_events_union(event_names:, start_date:, end_date:) - # :skip_file_list_validation is needed because this method is + def calculate_events_union(event_names:, property_name:, start_date:, end_date:) + # :used_in_aggregate_metric is needed because this method is # used by AggregatedMetrics, which sends :property_name even for legacy events - count_unique_events(event_names: event_names, property_name: nil, start_date: start_date, end_date: end_date, skip_file_list_validation: true) + count_unique_events(event_names: event_names, property_name: property_name, start_date: start_date, end_date: end_date, used_in_aggregate_metric: true) end private @@ -90,10 +90,10 @@ def track(values, event_name, property_name:, time: Time.zone.now) Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) end - def count_unique_events(event_names:, start_date:, end_date:, property_name:, skip_file_list_validation: false) + def count_unique_events(event_names:, start_date:, end_date:, property_name:, used_in_aggregate_metric: false) events = events_with_property_names(event_names, property_name) - keys = keys_for_aggregation(events: events, start_date: start_date, end_date: end_date, skip_file_list_validation: skip_file_list_validation) + keys = keys_for_aggregation(events: events, start_date: start_date, end_date: end_date, used_in_aggregate_metric: used_in_aggregate_metric) return FALLBACK unless keys.any? @@ -133,31 +133,32 @@ def event_for(event_name) known_events.find { |event| event[:name] == event_name.to_s } end - def redis_key(event, time, skip_file_list_validation) - key = redis_key_base(event, skip_file_list_validation) + def redis_key(event, time, used_in_aggregate_metric) + key = redis_key_base(event, used_in_aggregate_metric) year_week = time.strftime('%G-%V') "{#{REDIS_SLOT}}_#{key}-#{year_week}" end - def redis_key_base(event, skip_file_list_validation) + def redis_key_base(event, used_in_aggregate_metric) event_name = event[:name] property_name = event[:property_name] - validate!(event_name, property_name, skip_file_list_validation) + validate!(event_name, property_name, used_in_aggregate_metric) key = event_name - if Feature.enabled?(:redis_hll_property_name_tracking, type: :wip) && property_name + legacy_event_with_property_name = used_in_aggregate_metric && legacy_events.include?(event_name) + if Feature.enabled?(:redis_hll_property_name_tracking, type: :wip) && property_name && !legacy_event_with_property_name key = "#{key}-#{formatted_property_name(property_name)}" end key_overrides.fetch(key, key) end - def validate!(event_name, property_name, skip_file_list_validation) + def validate!(event_name, property_name, used_in_aggregate_metric) raise UnknownEvent, "Unknown event #{event_name}" unless known_events_names.include?(event_name.to_s) - return if skip_file_list_validation + return if used_in_aggregate_metric if property_name && legacy_events.include?(event_name) link = Rails.application.routes.url_helpers.help_page_url("ee/development/internal_analytics/internal_event_instrumentation/migration.html#backend-1") diff --git a/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb index c3060cd4927489a44ef4a16f84b254aa8ca47c4a..9dd91feb0f0cc2aa401182d6181e634073e2fa66 100644 --- a/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb +++ b/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb @@ -19,19 +19,19 @@ end context 'with valid configuration' do - where(:number_of_days, :operator, :datasource, :expected_method) do - 28 | 'AND' | 'redis_hll' | :calculate_metrics_intersections - 7 | 'AND' | 'redis_hll' | :calculate_metrics_intersections - 28 | 'AND' | 'database' | :calculate_metrics_intersections - 7 | 'AND' | 'database' | :calculate_metrics_intersections - 28 | 'OR' | 'redis_hll' | :calculate_metrics_union - 7 | 'OR' | 'redis_hll' | :calculate_metrics_union - 28 | 'OR' | 'database' | :calculate_metrics_union - 7 | 'OR' | 'database' | :calculate_metrics_union - 28 | 'AND' | 'internal_events' | :calculate_metrics_intersections - 7 | 'AND' | 'internal_events' | :calculate_metrics_intersections - 28 | 'OR' | 'internal_events' | :calculate_metrics_union - 7 | 'OR' | 'internal_events' | :calculate_metrics_union + where(:number_of_days, :operator, :datasource, :attribute, :expected_method) do + 28 | 'AND' | 'redis_hll' | 'user.id' | :calculate_metrics_intersections + 7 | 'AND' | 'redis_hll' | 'user.id' | :calculate_metrics_intersections + 28 | 'AND' | 'database' | 'project.id' | :calculate_metrics_intersections + 7 | 'AND' | 'database' | 'user.id' | :calculate_metrics_intersections + 28 | 'OR' | 'redis_hll' | 'user.id' | :calculate_metrics_union + 7 | 'OR' | 'redis_hll' | 'project.id' | :calculate_metrics_union + 28 | 'OR' | 'database' | 'user.id' | :calculate_metrics_union + 7 | 'OR' | 'database' | 'user.id' | :calculate_metrics_union + 28 | 'AND' | 'internal_events' | 'user.id' | :calculate_metrics_intersections + 7 | 'AND' | 'internal_events' | 'project.id' | :calculate_metrics_intersections + 28 | 'OR' | 'internal_events' | 'user.id' | :calculate_metrics_union + 7 | 'OR' | 'internal_events' | 'user.id' | :calculate_metrics_union end with_them do @@ -42,7 +42,8 @@ { source: datasource, operator: operator, - events: %w[event1 event2] + events: %w[event1 event2], + attribute: attribute } end @@ -55,7 +56,7 @@ it 'returns the number of unique events for aggregation', :aggregate_failures do expect(namespace::SOURCES[datasource]) .to receive(expected_method) - .with(params.merge(metric_names: %w[event1 event2])) + .with(params.merge(metric_names: %w[event1 event2], property_name: attribute)) .and_return(5) expect(calculate_count_for_aggregation).to eq(5) end @@ -99,7 +100,7 @@ it 'returns the number of unique events for aggregation', :aggregate_failures do expect(namespace::SOURCES[datasource]) .to receive(expected_method) - .with(params.merge(metric_names: expected_events)) + .with(params.merge(metric_names: expected_events, property_name: nil)) .and_return(5) expect(calculate_count_for_aggregation).to eq(5) end diff --git a/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb index 41cb445155ea57033788c44defb7f3b86832ea04..258b72cadfe24966f776d5efab51e71ab50f85bd 100644 --- a/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb +++ b/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb @@ -6,10 +6,11 @@ let_it_be(:recorded_at) { Time.current.to_i } let_it_be(:start_date) { 4.weeks.ago.to_date } let_it_be(:end_date) { Date.current } + let_it_be(:property_name) { 'property1' } shared_examples 'aggregated_metrics_data with source' do context 'with AND operator' do - let(:params) { { start_date: start_date, end_date: end_date, recorded_at: recorded_at } } + let(:params) { { start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name } } context 'with even number of metrics' do it 'calculates intersection correctly', :aggregate_failures do @@ -22,7 +23,7 @@ expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(8) # Exclusion inclusion principle formula to calculate intersection of 2 sets # |A & B| = (|A| + |B|) - |A + B| => (4 + 6) - 8 => 2 - expect(source.calculate_metrics_intersections(metric_names: %w[event3 event5], start_date: start_date, end_date: end_date, recorded_at: recorded_at)).to eq(2) + expect(source.calculate_metrics_intersections(metric_names: %w[event3 event5], start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(2) end end @@ -47,7 +48,7 @@ # Exclusion inclusion principle formula to calculate intersection of 3 sets # |A & B & C| = (|A & B| + |A & C| + |B & C|) - (|A| + |B| + |C|) + |A + B + C| # (1 + 1 + 1) - (2 + 3 + 5) + 8 => 1 - expect(source.calculate_metrics_intersections(metric_names: %w[event1 event2 event3], start_date: start_date, end_date: end_date, recorded_at: recorded_at)).to eq(1) + expect(source.calculate_metrics_intersections(metric_names: %w[event1 event2 event3], start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(1) end end end @@ -63,7 +64,7 @@ it 'caches intermediate operations', :aggregate_failures do events = %w[event1 event2 event3 event5] - params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at } + params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name } events.each do |event| expect(source).to receive(:calculate_metrics_union) @@ -81,7 +82,7 @@ end end - expect(source.calculate_metrics_intersections(metric_names: events, start_date: start_date, end_date: end_date, recorded_at: recorded_at)).to eq(0) + expect(source.calculate_metrics_intersections(metric_names: events, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(0) end it_behaves_like 'aggregated_metrics_data with source' diff --git a/spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb index 18a97447f1c43dce65ee42ce521a35ff66c9daed..7ee32a8ee30ac23a87bf0c4f6d3022f584718985 100644 --- a/spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb +++ b/spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb @@ -7,6 +7,7 @@ let_it_be(:end_date) { Date.current } let_it_be(:recorded_at) { Time.current } let_it_be(:time_period) { { created_at: (start_date..end_date) } } + let_it_be(:property_name) { 'property1' } let(:metric_1) { 'metric_1' } let(:metric_2) { 'metric_2' } @@ -35,7 +36,7 @@ describe '.calculate_events_union' do subject(:calculate_metrics_union) do - described_class.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + described_class.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) end it 'returns the number of unique events in the union of all metrics' do @@ -61,7 +62,7 @@ describe '.calculate_metrics_intersections' do subject(:calculate_metrics_intersections) do - described_class.calculate_metrics_intersections(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + described_class.calculate_metrics_intersections(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) end it 'returns the number of common events in the intersection of all metrics' do diff --git a/spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb index a137a97a978e47fd882a64f65019a690cff0f2bf..e499ff0bc70586b707f77802c26248c1a28bf283 100644 --- a/spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb +++ b/spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb @@ -7,15 +7,16 @@ let_it_be(:start_date) { 7.days.ago } let_it_be(:end_date) { Date.current } let_it_be(:recorded_at) { Time.current } + let_it_be(:property_name) { 'property_name' } describe '.calculate_events_union' do subject(:calculate_metrics_union) do - described_class.calculate_metrics_union(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: nil) + described_class.calculate_metrics_union(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: nil, property_name: property_name) end it 'calls Gitlab::UsageDataCounters::HLLRedisCounter.calculate_events_union' do expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union) - .with(event_names: event_names, start_date: start_date, end_date: end_date) + .with(event_names: event_names, start_date: start_date, end_date: end_date, property_name: property_name) .and_return(5) calculate_metrics_union @@ -30,18 +31,18 @@ describe '.calculate_metrics_intersections' do subject(:calculate_metrics_intersections) do - described_class.calculate_metrics_intersections(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + described_class.calculate_metrics_intersections(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) end it 'uses values returned by union to compute the intersection' do event_names.each do |event| expect(Gitlab::Usage::Metrics::Aggregates::Sources::RedisHll).to receive(:calculate_metrics_union) - .with(metric_names: event, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + .with(metric_names: event, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) .and_return(5) end expect(described_class).to receive(:calculate_metrics_union) - .with(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + .with(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) .and_return(2) expect(calculate_metrics_intersections).to eq(8) diff --git a/spec/lib/gitlab/usage/metrics/instrumentations/aggregated_metric_spec.rb b/spec/lib/gitlab/usage/metrics/instrumentations/aggregated_metric_spec.rb index f6b9da68184c9d362d48cff55c5c5a41ac8e7bc3..5cb08c1a3c40e10c3df615302e452c4d995eb6b2 100644 --- a/spec/lib/gitlab/usage/metrics/instrumentations/aggregated_metric_spec.rb +++ b/spec/lib/gitlab/usage/metrics/instrumentations/aggregated_metric_spec.rb @@ -5,43 +5,55 @@ RSpec.describe Gitlab::Usage::Metrics::Instrumentations::AggregatedMetric, :clean_gitlab_redis_shared_state, feature_category: :service_ping do using RSpec::Parameterized::TableSyntax + before do + stub_feature_flags(redis_hll_property_name_tracking: property_name_flag_enabled) + + redis_counter_class = Gitlab::UsageDataCounters::HLLRedisCounter + # weekly AND 1 weekly OR 2 - Gitlab::UsageDataCounters::HLLRedisCounter.track_event(:i_quickactions_approve, values: 1, time: 1.week.ago) - Gitlab::UsageDataCounters::HLLRedisCounter.track_event(:i_quickactions_unapprove, values: 1, time: 1.week.ago) - Gitlab::UsageDataCounters::HLLRedisCounter.track_event(:i_quickactions_unapprove, values: 2, time: 1.week.ago) + redis_counter_class.track_event(:g_edit_by_snippet_ide, values: 1, time: 1.week.ago, property_name: :user) + redis_counter_class.track_event(:g_edit_by_web_ide, values: 1, time: 1.week.ago, property_name: :user) + redis_counter_class.track_event(:g_edit_by_web_ide, values: 2, time: 1.week.ago, property_name: :user) # monthly AND 2 weekly OR 3 - Gitlab::UsageDataCounters::HLLRedisCounter.track_event(:i_quickactions_approve, values: 2, time: 2.weeks.ago) - Gitlab::UsageDataCounters::HLLRedisCounter.track_event(:i_quickactions_unapprove, values: 3, time: 2.weeks.ago) + redis_counter_class.track_event(:g_edit_by_snippet_ide, values: 2, time: 2.weeks.ago, property_name: :user) + redis_counter_class.track_event(:g_edit_by_web_ide, values: 3, time: 2.weeks.ago, property_name: :user) + + # different property_name + redis_counter_class.track_event(:g_edit_by_web_ide, values: 4, time: 1.week.ago, property_name: :project) # out of date range - Gitlab::UsageDataCounters::HLLRedisCounter.track_event(:i_quickactions_approve, values: 3, time: 2.months.ago) + redis_counter_class.track_event(:g_edit_by_snippet_ide, values: 3, time: 2.months.ago, property_name: :user) # database events Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll .save_aggregated_metrics( - metric_name: :i_quickactions_approve, + metric_name: :g_edit_by_snippet_ide, time_period: { created_at: (1.week.ago..Date.current) }, recorded_at_timestamp: Time.current, data: ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) ) Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll .save_aggregated_metrics( - metric_name: :i_quickactions_unapprove, + metric_name: :g_edit_by_web_ide, time_period: { created_at: (1.week.ago..Date.current) }, recorded_at_timestamp: Time.current, data: ::Gitlab::Database::PostgresHll::Buckets.new(10 => 1, 56 => 1) ) end - where(:data_source, :time_frame, :operator, :expected_value) do - 'redis_hll' | '28d' | 'AND' | 2 - 'redis_hll' | '28d' | 'OR' | 3 - 'redis_hll' | '7d' | 'AND' | 1 - 'redis_hll' | '7d' | 'OR' | 2 - 'database' | '7d' | 'OR' | 3.0 - 'database' | '7d' | 'AND' | 1.0 + where(:data_source, :time_frame, :operator, :attribute, :expected_value, :property_name_flag_enabled) do + 'redis_hll' | '28d' | 'AND' | 'user_id' | 2 | true + 'redis_hll' | '28d' | 'OR' | 'user_id' | 3 | true + 'redis_hll' | '28d' | 'OR' | 'user_id' | 4 | false + 'redis_hll' | '28d' | 'OR' | 'project_id' | 4 | false + 'redis_hll' | '7d' | 'AND' | 'user_id' | 1 | true + 'redis_hll' | '7d' | 'OR' | 'user_id' | 2 | true + 'redis_hll' | '7d' | 'OR' | 'project_id' | 1 | true + 'redis_hll' | '7d' | 'AND' | 'project_id' | 0 | true + 'database' | '7d' | 'OR' | 'user_id' | 3.0 | true + 'database' | '7d' | 'AND' | 'user_id' | 1.0 | true end with_them do @@ -52,11 +64,12 @@ time_frame: time_frame, options: { aggregate: { - operator: operator + operator: operator, + attribute: attribute }, events: %w[ - i_quickactions_approve - i_quickactions_unapprove + g_edit_by_snippet_ide + g_edit_by_web_ide ] } } @@ -70,4 +83,29 @@ expect(described_class.new(metric_definition).value).to be_within(error_rate).percent_of(expected_value) end end + + context "with not allowed aggregate attribute" do + let(:property_name_flag_enabled) { true } + let(:metric_definition) do + { + data_source: 'redis_hll', + time_frame: '28d', + options: { + aggregate: { + operator: 'OR', + attribute: 'project.name' + }, + events: %w[ + g_edit_by_snippet_ide + g_edit_by_web_ide + ] + } + } + end + + it "raises an error" do + error_class = Gitlab::Usage::MetricDefinition::InvalidError + expect { described_class.new(metric_definition).value }.to raise_error(error_class) + end + end end diff --git a/spec/lib/gitlab/usage/time_series_storable_spec.rb b/spec/lib/gitlab/usage/time_series_storable_spec.rb index ff2c4ad6bc2911bb7ff095b242c4e3e6599d27cb..e534278accbaa0f2d704b2b0766dbf53fdb6da9c 100644 --- a/spec/lib/gitlab/usage/time_series_storable_spec.rb +++ b/spec/lib/gitlab/usage/time_series_storable_spec.rb @@ -7,9 +7,9 @@ Class.new do include Gitlab::Usage::TimeSeriesStorable - def redis_key(event, date, skip_file_list_validation) + def redis_key(event, date, used_in_aggregate_metric) key = apply_time_aggregation(event, date) - "#{key}:#{skip_file_list_validation}" + "#{key}:#{used_in_aggregate_metric}" end end end @@ -27,7 +27,7 @@ def redis_key(event, date, skip_file_list_validation) describe '#keys_for_aggregation' do let(:result) { counter_instance.keys_for_aggregation(**params) } - let(:params) { { events: events, start_date: start_date, end_date: end_date, skip_file_list_validation: true } } + let(:params) { { events: events, start_date: start_date, end_date: end_date, used_in_aggregate_metric: true } } let(:events) { %w[event1 event2] } let(:start_date) { Date.new(2023, 4, 1) } let(:end_date) { Date.new(2023, 4, 15) } diff --git a/spec/lib/gitlab/usage_data_counters/hll_redis_counter_spec.rb b/spec/lib/gitlab/usage_data_counters/hll_redis_counter_spec.rb index b787bf3baccd1e43ab75b967a316b8413e70adb5..270ee7330da9bee475ad063e7f5e9706dc98ef17 100644 --- a/spec/lib/gitlab/usage_data_counters/hll_redis_counter_spec.rb +++ b/spec/lib/gitlab/usage_data_counters/hll_redis_counter_spec.rb @@ -537,7 +537,8 @@ { name: 'g_project_management_epic_created' }, { name: 'g_project_management_epic_closed' }, { name: 'g_project_management_epic_reopened' }, - { name: 'g_project_management_epic_issue_added' } + { name: 'g_project_management_epic_issue_added' }, + { name: 'g_analytics_insights' } ].map(&:with_indifferent_access) end @@ -554,6 +555,10 @@ described_class.track_event('g_project_management_epic_closed', values: entity2, time: 3.days.ago, property_name: :user) described_class.track_event('g_project_management_epic_reopened', values: entity2, time: 3.days.ago, property_name: :user) + # legacy event + described_class.track_event('g_analytics_insights', values: entity1, time: 3.days.ago) + described_class.track_event('g_analytics_insights', values: entity2, time: 3.days.ago) + # events out of time scope described_class.track_event('g_project_management_epic_created', values: entity4, time: 8.days.ago, property_name: :user) @@ -563,13 +568,27 @@ end it 'calculates union of given events', :aggregate_failures do - expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_project_management_epic_issue_added]))).to eq 2 - expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_compliance_dashboard g_project_management_epic_created g_project_management_epic_closed]))).to eq 3 + expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_project_management_epic_issue_added], property_name: nil))).to eq 2 + expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_compliance_dashboard g_project_management_epic_created g_project_management_epic_closed], property_name: nil))).to eq 3 + end + + context "with property_name" do + it 'calculates union of given events', :aggregate_failures do + expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_project_management_epic_issue_added], property_name: 'user.id'))).to eq 2 + expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_compliance_dashboard g_project_management_epic_created g_project_management_epic_closed], property_name: 'user.id'))).to eq 3 + end + + context "with a legacy event" do + it 'ignores the property_name and uses original redis key for the event', :aggregate_failures do + expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_analytics_insights], property_name: 'user.id'))).to eq 2 + expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[g_analytics_insights], property_name: 'project.id'))).to eq 2 + end + end end - it 'returns 0 if there are no keys for given events' do + it 'returns fallback value if there are no keys for given events' do expect(Gitlab::Redis::HLL).not_to receive(:count) - expect(described_class.calculate_events_union(event_names: %w[g_compliance_dashboard g_project_management_epic_created g_project_management_epic_closed], start_date: Date.current, end_date: 4.weeks.ago)).to eq(-1) + expect(described_class.calculate_events_union(event_names: %w[g_compliance_dashboard g_project_management_epic_created g_project_management_epic_closed], start_date: Date.current, end_date: 4.weeks.ago, property_name: nil)).to eq(-1) end end