From fbc012b3c76eb45b62d5b0cdd08773c637e11d11 Mon Sep 17 00:00:00 2001
From: Jonas Larsen <jlarsen@gitlab.com>
Date: Mon, 19 Feb 2024 08:27:19 +0000
Subject: [PATCH] Remove "AND" operator - Intersection class

---
 .rubocop_todo/layout/line_length.yml          |  2 -
 .rubocop_todo/rspec/feature_category.yml      |  1 -
 .../metrics/aggregates/aggregate_spec.rb      |  4 +-
 lib/gitlab/usage/metrics/aggregates.rb        |  3 +-
 .../usage/metrics/aggregates/aggregate.rb     |  7 +-
 .../sources/calculations/intersection.rb      | 74 ---------------
 .../metrics/aggregates/aggregate_spec.rb      | 12 +--
 .../sources/calculations/intersection_spec.rb | 90 -------------------
 spec/support/rspec_order_todo.yml             |  1 -
 9 files changed, 7 insertions(+), 187 deletions(-)
 delete mode 100644 lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb
 delete mode 100644 spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb

diff --git a/.rubocop_todo/layout/line_length.yml b/.rubocop_todo/layout/line_length.yml
index 3c9b4a6acb6d8..898088c9021f4 100644
--- a/.rubocop_todo/layout/line_length.yml
+++ b/.rubocop_todo/layout/line_length.yml
@@ -2666,7 +2666,6 @@ Layout/LineLength:
     - 'lib/gitlab/tracking/event_definition.rb'
     - 'lib/gitlab/usage/metric_definition.rb'
     - 'lib/gitlab/usage/metrics/aggregates/aggregate.rb'
-    - 'lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb'
     - 'lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb'
     - 'lib/gitlab/usage/service_ping_report.rb'
     - 'lib/gitlab/usage_data.rb'
@@ -3794,7 +3793,6 @@ Layout/LineLength:
     - 'spec/lib/gitlab/usage/metric_definition_spec.rb'
     - 'spec/lib/gitlab/usage/metric_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb'
-    - 'spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/instrumentations/count_users_creating_issues_metric_spec.rb'
diff --git a/.rubocop_todo/rspec/feature_category.yml b/.rubocop_todo/rspec/feature_category.yml
index daf099b379766..eed1d8573ebb5 100644
--- a/.rubocop_todo/rspec/feature_category.yml
+++ b/.rubocop_todo/rspec/feature_category.yml
@@ -3972,7 +3972,6 @@ RSpec/FeatureCategory:
     - 'spec/lib/gitlab/usage/metric_definition_spec.rb'
     - 'spec/lib/gitlab/usage/metric_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb'
-    - 'spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb'
     - 'spec/lib/gitlab/usage/metrics/instrumentations/active_user_count_metric_spec.rb'
diff --git a/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb b/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb
index e7692c6803ff9..f20db975af720 100644
--- a/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb
+++ b/ee/spec/lib/ee/gitlab/usage/metrics/aggregates/aggregate_spec.rb
@@ -17,7 +17,7 @@
     let(:aggregate) do
       {
         source: datasource,
-        operator: 'AND',
+        operator: 'OR',
         events: events
       }
     end
@@ -39,7 +39,7 @@
 
       it 'returns the number of unique events for aggregation', :aggregate_failures do
         expect(namespace::SOURCES[datasource])
-          .to receive(:calculate_metrics_intersections)
+          .to receive(:calculate_metrics_union)
                 .with(params.merge(metric_names: events, property_name: nil))
                 .and_return(5)
         expect(calculate_count_for_aggregation).to eq(5)
diff --git a/lib/gitlab/usage/metrics/aggregates.rb b/lib/gitlab/usage/metrics/aggregates.rb
index 0edd9f7914acd..a3410cbc39351 100644
--- a/lib/gitlab/usage/metrics/aggregates.rb
+++ b/lib/gitlab/usage/metrics/aggregates.rb
@@ -5,8 +5,7 @@ module Usage
     module Metrics
       module Aggregates
         UNION_OF_AGGREGATED_METRICS = 'OR'
-        INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
-        ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
+        ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS].freeze
         AggregatedMetricError = Class.new(StandardError)
         UnknownAggregationOperator = Class.new(AggregatedMetricError)
         UnknownAggregationSource = Class.new(AggregatedMetricError)
diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb
index 19fc3a90be3c0..079fb39851cc3 100644
--- a/lib/gitlab/usage/metrics/aggregates/aggregate.rb
+++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb
@@ -17,11 +17,8 @@ def calculate_count_for_aggregation(aggregation:, time_frame:)
               events = select_defined_events(aggregation[:events], aggregation[:source])
               property_name = aggregation[:attribute]
 
-              if aggregation[:operator] == UNION_OF_AGGREGATED_METRICS
-                source.calculate_metrics_union(**time_constraints(time_frame).merge(metric_names: events, property_name: property_name, recorded_at: recorded_at))
-              else
-                source.calculate_metrics_intersections(**time_constraints(time_frame).merge(metric_names: events, property_name: property_name, recorded_at: recorded_at))
-              end
+              source.calculate_metrics_union(**time_constraints(time_frame)
+                .merge(metric_names: events, property_name: property_name, recorded_at: recorded_at))
             end
           rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error
             failure(error)
diff --git a/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb b/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb
deleted file mode 100644
index 7c9561cd06d4a..0000000000000
--- a/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb
+++ /dev/null
@@ -1,74 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
-  module Usage
-    module Metrics
-      module Aggregates
-        module Sources
-          module Calculations
-            module Intersection
-              def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recorded_at:, property_name:, subset_powers_cache: Hash.new({}))
-                # calculate power of intersection of all given metrics from inclusion exclusion principle
-                # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|)  =>
-                # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
-                # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
-                # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
-
-                # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) -  ...
-                subset_powers_data = subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, property_name, subset_powers_cache)
-
-                # calculate last component of the equation  |A & B & C & D| = .... - |A + B + C + D|
-                power_of_union_of_all_metrics = subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
-                  calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)
-
-                # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
-                # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
-                # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C|  =>
-                # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
-                # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
-                # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
-                subset_powers_size_even = subset_powers_data.size.even?
-
-                # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) -  ... =>
-                sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
-
-                # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
-                sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
-              end
-
-              private
-
-              def subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, property_name, subset_powers_cache)
-                subset_sizes = (1...metric_names.size)
-
-                subset_sizes.map do |subset_size|
-                  if subset_size > 1
-                    # calculate sum of powers of intersection between each subset (with given size) of metrics:  #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
-                    metric_names.combination(subset_size).sum do |metrics_subset|
-                      subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
-                        calculate_metrics_intersections(metric_names: metrics_subset, start_date: start_date, end_date: end_date, recorded_at: recorded_at, subset_powers_cache: subset_powers_cache, property_name: property_name)
-                    end
-                  else
-                    # calculate sum of powers of each set (metric) alone  #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
-                    metric_names.sum do |metric|
-                      subset_powers_cache[subset_size][metric] ||= \
-                        calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)
-                    end
-                  end
-                end
-              end
-
-              def sum_subset_powers(subset_powers_data, subset_powers_size_even)
-                sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
-                  (index + 1).odd? ? value : -value
-                end
-
-                (subset_powers_size_even ? -1 : 1) * sum_without_sign
-              end
-            end
-          end
-        end
-      end
-    end
-  end
-end
diff --git a/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb
index 9dd91feb0f0cc..edace79ad7a82 100644
--- a/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb
+++ b/spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb
@@ -20,16 +20,10 @@
 
     context 'with valid configuration' do
       where(:number_of_days, :operator, :datasource, :attribute, :expected_method) do
-        28 | 'AND' | 'redis_hll'       | 'user.id'    | :calculate_metrics_intersections
-        7  | 'AND' | 'redis_hll'       | 'user.id'    | :calculate_metrics_intersections
-        28 | 'AND' | 'database'        | 'project.id' | :calculate_metrics_intersections
-        7  | 'AND' | 'database'        | 'user.id'    | :calculate_metrics_intersections
         28 | 'OR'  | 'redis_hll'       | 'user.id'    | :calculate_metrics_union
         7  | 'OR'  | 'redis_hll'       | 'project.id' | :calculate_metrics_union
         28 | 'OR'  | 'database'        | 'user.id'    | :calculate_metrics_union
         7  | 'OR'  | 'database'        | 'user.id'    | :calculate_metrics_union
-        28 | 'AND' | 'internal_events' | 'user.id'    | :calculate_metrics_intersections
-        7  | 'AND' | 'internal_events' | 'project.id' | :calculate_metrics_intersections
         28 | 'OR'  | 'internal_events' | 'user.id'    | :calculate_metrics_union
         7  | 'OR'  | 'internal_events' | 'user.id'    | :calculate_metrics_union
       end
@@ -73,8 +67,6 @@
       end
 
       where(:operator, :datasource, :expected_method, :expected_events) do
-        'AND' | 'redis_hll' | :calculate_metrics_intersections | %w[event1 event2]
-        'AND' | 'database'  | :calculate_metrics_intersections | %w[event1 event2 event3]
         'OR'  | 'redis_hll' | :calculate_metrics_union         | %w[event1 event2]
         'OR'  | 'database'  | :calculate_metrics_union         | %w[event1 event2 event3]
       end
@@ -110,8 +102,8 @@
     context 'with invalid configuration' do
       where(:time_frame, :operator, :datasource, :expected_error) do
         '28d' | 'SUM' | 'redis_hll' | namespace::UnknownAggregationOperator
-        '7d'  | 'AND' | 'mongodb'   | namespace::UnknownAggregationSource
-        'all' | 'AND' | 'redis_hll' | namespace::DisallowedAggregationTimeFrame
+        '7d'  | 'OR' | 'mongodb'   | namespace::UnknownAggregationSource
+        'all' | 'OR' | 'redis_hll' | namespace::DisallowedAggregationTimeFrame
       end
 
       with_them do
diff --git a/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb b/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb
deleted file mode 100644
index 258b72cadfe24..0000000000000
--- a/spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb
+++ /dev/null
@@ -1,90 +0,0 @@
-# frozen_string_literal: true
-
-require 'spec_helper'
-
-RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::Calculations::Intersection do
-  let_it_be(:recorded_at) { Time.current.to_i }
-  let_it_be(:start_date) { 4.weeks.ago.to_date }
-  let_it_be(:end_date) { Date.current }
-  let_it_be(:property_name) { 'property1' }
-
-  shared_examples 'aggregated_metrics_data with source' do
-    context 'with AND operator' do
-      let(:params) { { start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name } }
-
-      context 'with even number of metrics' do
-        it 'calculates intersection correctly', :aggregate_failures do
-          # gmau_1 data is as follow
-          # |A| => 4
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(4)
-          # |B| => 6
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event5')).and_return(6)
-          # |A + B| => 8
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(8)
-          # Exclusion inclusion principle formula to calculate intersection of 2 sets
-          # |A & B| = (|A| + |B|) - |A + B| => (4 + 6) - 8 => 2
-          expect(source.calculate_metrics_intersections(metric_names: %w[event3 event5], start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(2)
-        end
-      end
-
-      context 'with odd number of metrics' do
-        it 'calculates intersection correctly', :aggregate_failures do
-          # gmau_2 data is as follow:
-          # |A| => 2
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event1')).and_return(2)
-          # |B| => 3
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event2')).and_return(3)
-          # |C| => 5
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(5)
-
-          # |A + B| => 4 therefore |A & B| = (|A| + |B|) - |A + B| =>  2 + 3 - 4 => 1
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2])).and_return(4)
-          # |A + C| => 6 therefore |A & C| = (|A| + |C|) - |A + C| =>  2 + 5 - 6  => 1
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event3])).and_return(6)
-          # |B + C| => 7 therefore |B & C| = (|B| + |C|) - |B + C| => 3 + 5 - 7 => 1
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event2 event3])).and_return(7)
-          # |A + B + C| => 8
-          expect(source).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3])).and_return(8)
-          # Exclusion inclusion principle formula to calculate intersection of 3 sets
-          # |A & B & C| = (|A & B| + |A & C| + |B & C|) - (|A| + |B| + |C|)  + |A + B + C|
-          # (1 + 1 + 1) - (2 + 3 + 5) + 8 => 1
-          expect(source.calculate_metrics_intersections(metric_names: %w[event1 event2 event3], start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(1)
-        end
-      end
-    end
-  end
-
-  describe '.aggregated_metrics_data' do
-    let(:source) do
-      Class.new do
-        extend Gitlab::Usage::Metrics::Aggregates::Sources::Calculations::Intersection
-      end
-    end
-
-    it 'caches intermediate operations', :aggregate_failures do
-      events = %w[event1 event2 event3 event5]
-
-      params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name }
-
-      events.each do |event|
-        expect(source).to receive(:calculate_metrics_union)
-                                       .with(params.merge(metric_names: event))
-                                       .once
-                                       .and_return(0)
-      end
-
-      2.upto(4) do |subset_size|
-        events.combination(subset_size).each do |events|
-          expect(source).to receive(:calculate_metrics_union)
-                                         .with(params.merge(metric_names: events))
-                                         .once
-                                         .and_return(0)
-        end
-      end
-
-      expect(source.calculate_metrics_intersections(metric_names: events, start_date: start_date, end_date: end_date, recorded_at: recorded_at, property_name: property_name)).to eq(0)
-    end
-
-    it_behaves_like 'aggregated_metrics_data with source'
-  end
-end
diff --git a/spec/support/rspec_order_todo.yml b/spec/support/rspec_order_todo.yml
index c2ca175745212..b97c3755e6f22 100644
--- a/spec/support/rspec_order_todo.yml
+++ b/spec/support/rspec_order_todo.yml
@@ -6735,7 +6735,6 @@
 - './spec/lib/gitlab/usage_data/topology_spec.rb'
 - './spec/lib/gitlab/usage/metric_definition_spec.rb'
 - './spec/lib/gitlab/usage/metrics/aggregates/aggregate_spec.rb'
-- './spec/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection_spec.rb'
 - './spec/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll_spec.rb'
 - './spec/lib/gitlab/usage/metrics/aggregates/sources/redis_hll_spec.rb'
 - './spec/lib/gitlab/usage/metrics/instrumentations/active_user_count_metric_spec.rb'
-- 
GitLab