From fbc012b3c76eb45b62d5b0cdd08773c637e11d11 Mon Sep 17 00:00:00 2001 From: Jonas Larsen <jlarsen@gitlab.com> Date: Mon, 19 Feb 2024 08:27:19 +0000 Subject: [PATCH] Remove "AND" operator - Intersection class --- .rubocop_todo/layout/line_length.yml | 2 - .rubocop_todo/rspec/feature_category.yml | 1 - .../metrics/aggregates/aggregate_spec.rb | 4 +- lib/gitlab/usage/metrics/aggregates.rb | 3 +- .../usage/metrics/aggregates/aggregate.rb | 7 +- .../sources/calculations/intersection.rb | 74 --------------- .../metrics/aggregates/aggregate_spec.rb | 12 +-- .../sources/calculations/intersection_spec.rb | 90 ------------------- spec/support/rspec_order_todo.yml | 1 - 9 files changed, 7 insertions(+), 187 deletions(-) delete mode 100644 lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb delete mode 100644 spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb diff --git a/.rubocop_todo/layout/line_length.yml b/.rubocop_todo/layout/line_length.yml index 3c9b4a6acb6d8..898088c9021f4 100644 --- a/.rubocop_todo/layout/line_length.yml +++ b/.rubocop_todo/layout/line_length.yml @@ -2666,7 +2666,6 @@ Layout/LineLength: - 'lib/gitlab/tracking/event_definition.rb' - 'lib/gitlab/usage/metric_definition.rb' - 'lib/gitlab/usage/metrics/aggregates/aggregate.rb' - - 'lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb' - 'lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb' - 'lib/gitlab/usage/service_ping_report.rb' - 'lib/gitlab/usage_data.rb' @@ -3794,7 +3793,6 @@ Layout/LineLength: - 'spec/lib/gitlab/usage/metric_definition_spec.rb' - 'spec/lib/gitlab/usage/metric_spec.rb' - 'spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb' - - 'spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb' - 'spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb' - 'spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb' - 'spec/lib/gitlab/usage/metrics/instrumentations/count_users_creating_issues_metric_spec.rb' diff --git a/.rubocop_todo/rspec/feature_category.yml b/.rubocop_todo/rspec/feature_category.yml index daf099b379766..eed1d8573ebb5 100644 --- a/.rubocop_todo/rspec/feature_category.yml +++ b/.rubocop_todo/rspec/feature_category.yml @@ -3972,7 +3972,6 @@ RSpec/FeatureCategory: - 'spec/lib/gitlab/usage/metric_definition_spec.rb' - 'spec/lib/gitlab/usage/metric_spec.rb' - 'spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb' - - 'spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb' - 'spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb' - 'spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb' - 'spec/lib/gitlab/usage/metrics/instrumentations/active_user_count_metric_spec.rb' diff --git a/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb b/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb index e7692c6803ff9..f20db975af720 100644 --- a/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb +++ b/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb @@ -17,7 +17,7 @@ let(:aggregate) do { source: datasource, - operator: 'AND', + operator: 'OR', events: events } end @@ -39,7 +39,7 @@ it 'returns the number of unique events for aggregation', :aggregate_failures do expect(namespace::SOURCES[datasource]) - .to receive(:calculate_metrics_intersections) + .to receive(:calculate_metrics_union) .with(params.merge(metric_names: events, property_name: nil)) .and_return(5) expect(calculate_count_for_aggregation).to eq(5) diff --git a/lib/gitlab/usage/metrics/aggregates.rb b/lib/gitlab/usage/metrics/aggregates.rb index 0edd9f7914acd..a3410cbc39351 100644 --- a/lib/gitlab/usage/metrics/aggregates.rb +++ b/lib/gitlab/usage/metrics/aggregates.rb @@ -5,8 +5,7 @@ module Usage module Metrics module Aggregates UNION_OF_AGGREGATED_METRICS = 'OR' - INTERSECTION_OF_AGGREGATED_METRICS = 'AND' - ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze + ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS].freeze AggregatedMetricError = Class.new(StandardError) UnknownAggregationOperator = Class.new(AggregatedMetricError) UnknownAggregationSource = Class.new(AggregatedMetricError) diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb index 19fc3a90be3c0..079fb39851cc3 100644 --- a/lib/gitlab/usage/metrics/aggregates/aggregate.rb +++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb @@ -17,11 +17,8 @@ def calculate_count_for_aggregation(aggregation:, time_frame:) events = select_defined_events(aggregation[:events], aggregation[:source]) property_name = aggregation[:attribute] - if aggregation[:operator] == UNION_OF_AGGREGATED_METRICS - source.calculate_metrics_union(**time_constraints(time_frame).merge(metric_names: events, property_name: property_name, recorded_at: recorded_at)) - else - source.calculate_metrics_intersections(**time_constraints(time_frame).merge(metric_names: events, property_name: property_name, recorded_at: recorded_at)) - end + source.calculate_metrics_union(**time_constraints(time_frame) + .merge(metric_names: events, property_name: property_name, recorded_at: recorded_at)) end rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error failure(error) diff --git a/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb b/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb deleted file mode 100644 index 7c9561cd06d4a..0000000000000 --- a/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb +++ /dev/null @@ -1,74 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Usage - module Metrics - module Aggregates - module Sources - module Calculations - module Intersection - def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recorded_at:, property_name:, subset_powers_cache: Hash.new({})) - # calculate power of intersection of all given metrics from inclusion exclusion principle - # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) => - # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| - # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => - # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| - - # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... - subset_powers_data = subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, property_name, subset_powers_cache) - - # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D| - power_of_union_of_all_metrics = subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \ - calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) - - # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate, - # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below - # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| => - # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| - # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => - # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| - subset_powers_size_even = subset_powers_data.size.even? - - # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... => - sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even) - - # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D| - sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics) - end - - private - - def subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, property_name, subset_powers_cache) - subset_sizes = (1...metric_names.size) - - subset_sizes.map do |subset_size| - if subset_size > 1 - # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|) - metric_names.combination(subset_size).sum do |metrics_subset| - subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||= - calculate_metrics_intersections(metric_names: metrics_subset, start_date: start_date, end_date: end_date, recorded_at: recorded_at, subset_powers_cache: subset_powers_cache, property_name: property_name) - end - else - # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ... - metric_names.sum do |metric| - subset_powers_cache[subset_size][metric] ||= \ - calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name) - end - end - end - end - - def sum_subset_powers(subset_powers_data, subset_powers_size_even) - sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index| - (index + 1).odd? ? value : -value - end - - (subset_powers_size_even ? -1 : 1) * sum_without_sign - end - end - end - end - end - end - end -end diff --git a/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb index 9dd91feb0f0cc..edace79ad7a82 100644 --- a/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb +++ b/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb @@ -20,16 +20,10 @@ context 'with valid configuration' do where(:number_of_days, :operator, :datasource, :attribute, :expected_method) do - 28 | 'AND' | 'redis_hll' | 'user.id' | :calculate_metrics_intersections - 7 | 'AND' | 'redis_hll' | 'user.id' | :calculate_metrics_intersections - 28 | 'AND' | 'database' | 'project.id' | :calculate_metrics_intersections - 7 | 'AND' | 'database' | 'user.id' | :calculate_metrics_intersections 28 | 'OR' | 'redis_hll' | 'user.id' | :calculate_metrics_union 7 | 'OR' | 'redis_hll' | 'project.id' | :calculate_metrics_union 28 | 'OR' | 'database' | 'user.id' | :calculate_metrics_union 7 | 'OR' | 'database' | 'user.id' | :calculate_metrics_union - 28 | 'AND' | 'internal_events' | 'user.id' | :calculate_metrics_intersections - 7 | 'AND' | 'internal_events' | 'project.id' | :calculate_metrics_intersections 28 | 'OR' | 'internal_events' | 'user.id' | :calculate_metrics_union 7 | 'OR' | 'internal_events' | 'user.id' | :calculate_metrics_union end @@ -73,8 +67,6 @@ end where(:operator, :datasource, :expected_method, :expected_events) do - 'AND' | 'redis_hll' | :calculate_metrics_intersections | %w[event1 event2] - 'AND' | 'database' | :calculate_metrics_intersections | %w[event1 event2 event3] 'OR' | 'redis_hll' | :calculate_metrics_union | %w[event1 event2] 'OR' | 'database' | :calculate_metrics_union | %w[event1 event2 event3] end @@ -110,8 +102,8 @@ context 'with invalid configuration' do where(:time_frame, :operator, :datasource, :expected_error) do '28d' | 'SUM' | 'redis_hll' | namespace::UnknownAggregationOperator - '7d' | 'AND' | 'mongodb' | namespace::UnknownAggregationSource - 'all' | 'AND' | 'redis_hll' | namespace::DisallowedAggregationTimeFrame + '7d' | 'OR' | 'mongodb' | namespace::UnknownAggregationSource + 'all' | 'OR' | 'redis_hll' | namespace::DisallowedAggregationTimeFrame end with_them do diff --git a/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb deleted file mode 100644 index 258b72cadfe24..0000000000000 --- a/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb +++ /dev/null @@ -1,90 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::Calculations::Intersection do - let_it_be(:recorded_at) { Time.current.to_i } - let_it_be(:start_date) { 4.weeks.ago.to_date } - let_it_be(:end_date) { Date.current } - let_it_be(:property_name) { 'property1' } - - shared_examples 'aggregated_metrics_data with source' do - context 'with AND operator' do - let(:params) { { start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name } } - - context 'with even number of metrics' do - it 'calculates intersection correctly', :aggregate_failures do - # gmau_1 data is as follow - # |A| => 4 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(4) - # |B| => 6 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event5')).and_return(6) - # |A + B| => 8 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(8) - # Exclusion inclusion principle formula to calculate intersection of 2 sets - # |A & B| = (|A| + |B|) - |A + B| => (4 + 6) - 8 => 2 - expect(source.calculate_metrics_intersections(metric_names: %w[event3 event5], start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(2) - end - end - - context 'with odd number of metrics' do - it 'calculates intersection correctly', :aggregate_failures do - # gmau_2 data is as follow: - # |A| => 2 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event1')).and_return(2) - # |B| => 3 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event2')).and_return(3) - # |C| => 5 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(5) - - # |A + B| => 4 therefore |A & B| = (|A| + |B|) - |A + B| => 2 + 3 - 4 => 1 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2])).and_return(4) - # |A + C| => 6 therefore |A & C| = (|A| + |C|) - |A + C| => 2 + 5 - 6 => 1 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event3])).and_return(6) - # |B + C| => 7 therefore |B & C| = (|B| + |C|) - |B + C| => 3 + 5 - 7 => 1 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event2 event3])).and_return(7) - # |A + B + C| => 8 - expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3])).and_return(8) - # Exclusion inclusion principle formula to calculate intersection of 3 sets - # |A & B & C| = (|A & B| + |A & C| + |B & C|) - (|A| + |B| + |C|) + |A + B + C| - # (1 + 1 + 1) - (2 + 3 + 5) + 8 => 1 - expect(source.calculate_metrics_intersections(metric_names: %w[event1 event2 event3], start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(1) - end - end - end - end - - describe '.aggregated_metrics_data' do - let(:source) do - Class.new do - extend Gitlab::Usage::Metrics::Aggregates::Sources::Calculations::Intersection - end - end - - it 'caches intermediate operations', :aggregate_failures do - events = %w[event1 event2 event3 event5] - - params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name } - - events.each do |event| - expect(source).to receive(:calculate_metrics_union) - .with(params.merge(metric_names: event)) - .once - .and_return(0) - end - - 2.upto(4) do |subset_size| - events.combination(subset_size).each do |events| - expect(source).to receive(:calculate_metrics_union) - .with(params.merge(metric_names: events)) - .once - .and_return(0) - end - end - - expect(source.calculate_metrics_intersections(metric_names: events, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(0) - end - - it_behaves_like 'aggregated_metrics_data with source' - end -end diff --git a/spec/support/rspec_order_todo.yml b/spec/support/rspec_order_todo.yml index c2ca175745212..b97c3755e6f22 100644 --- a/spec/support/rspec_order_todo.yml +++ b/spec/support/rspec_order_todo.yml @@ -6735,7 +6735,6 @@ - './spec/lib/gitlab/usage_data/topology_spec.rb' - './spec/lib/gitlab/usage/metric_definition_spec.rb' - './spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb' -- './spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb' - './spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb' - './spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb' - './spec/lib/gitlab/usage/metrics/instrumentations/active_user_count_metric_spec.rb' -- GitLab