From 0ce8a7ae2f050ed3b4b2ec190e98edf31e049c62 Mon Sep 17 00:00:00 2001
From: Marius Bobin <mbobin@gitlab.com>
Date: Thu, 21 Mar 2024 14:08:09 +0200
Subject: [PATCH] Keep scope conditions for associations

If an association defines a scope with arity != 0
then the scope is not evaluated when preloading
that relation. This fixes it.

Changelog: fixed
---
 .../reflection/macro_reflection.rb            | 27 ++++++-
 .../gitlab_patches/partitioning/joins_spec.rb |  9 ++-
 .../partitioning/preloads_spec.rb             | 76 ++++++++++++++++++-
 .../spec/support/models.rb                    | 13 ++++
 4 files changed, 119 insertions(+), 6 deletions(-)

diff --git a/gems/activerecord-gitlab/lib/active_record/gitlab_patches/partitioning/reflection/macro_reflection.rb b/gems/activerecord-gitlab/lib/active_record/gitlab_patches/partitioning/reflection/macro_reflection.rb
index 7ec7da44253e8..17c05645ac1d6 100644
--- a/gems/activerecord-gitlab/lib/active_record/gitlab_patches/partitioning/reflection/macro_reflection.rb
+++ b/gems/activerecord-gitlab/lib/active_record/gitlab_patches/partitioning/reflection/macro_reflection.rb
@@ -5,13 +5,38 @@ module GitlabPatches
     module Partitioning
       module Reflection
         module MacroReflection
+          NO_OWNER = Struct.new(:partition_id).new(1..100_000)
+
+          # We override the method to allow eager loading of partitioned records
+          #
+          # For eager loading the owner is always nil and we supply a benign
+          # object that allows the scope to be evaluated with a query like
+          # `where partition_id between 1 and 100000`
+          # and transforms it into
+          # `where partition_id is not null`
+          # to ensure that no partition is left out by the query.
+          # This is safe because `partition_id` columns are defined as `not null`
+          #
           def scope_for(relation, owner = nil)
             if scope.arity == 1 && owner.nil? && options.key?(:partition_foreign_key)
-              relation
+              relation = relation.instance_exec(NO_OWNER, &scope)
+              if relation_includes_partition_id_condition?(relation)
+                relation.rewhere(relation.table[:partition_id].not_eq(nil))
+              else
+                relation
+              end
             else
               super
             end
           end
+
+          def relation_includes_partition_id_condition?(relation)
+            relation
+              .where_clause
+              .extract_attributes
+              .map(&:name)
+              .include?('partition_id')
+          end
         end
       end
     end
diff --git a/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/joins_spec.rb b/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/joins_spec.rb
index 038fae4364482..67568a5a93323 100644
--- a/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/joins_spec.rb
+++ b/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/joins_spec.rb
@@ -9,7 +9,8 @@
     join_statement = <<~SQL.squish
       SELECT \"pipelines\".*
       FROM \"pipelines\"
-      INNER JOIN \"jobs\" ON \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
+      INNER JOIN \"jobs\" ON \"jobs\".\"partition_id\" IS NOT NULL
+      AND \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
       AND \"jobs\".\"partition_id\" = \"pipelines\".\"partition_id\"
       WHERE \"pipelines\".\"partition_id\" = #{pipeline.partition_id}
     SQL
@@ -25,9 +26,11 @@
     join_statement = <<~SQL.squish
       SELECT \"pipelines\".*
       FROM \"pipelines\"
-      INNER JOIN \"jobs\" ON \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
+      INNER JOIN \"jobs\" ON \"jobs\".\"partition_id\" IS NOT NULL
+      AND \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
       AND \"jobs\".\"partition_id\" = \"pipelines\".\"partition_id\"
-      INNER JOIN \"metadata\" ON \"metadata\".\"job_id\" = \"jobs\".\"id\"
+      INNER JOIN \"metadata\" ON \"metadata\".\"partition_id\" IS NOT NULL
+      AND \"metadata\".\"job_id\" = \"jobs\".\"id\"
       AND \"metadata\".\"partition_id\" = \"jobs\".\"partition_id\"
       WHERE \"pipelines\".\"partition_id\" = #{pipeline.partition_id}
     SQL
diff --git a/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/preloads_spec.rb b/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/preloads_spec.rb
index f37a563fe9e84..2bbd903e7ef3c 100644
--- a/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/preloads_spec.rb
+++ b/gems/activerecord-gitlab/spec/active_record/gitlab_patches/partitioning/preloads_spec.rb
@@ -216,9 +216,11 @@
         AS t2_r2, \"metadata\".\"test_flag\"
         AS t2_r3
         FROM \"pipelines\"
-        LEFT OUTER JOIN \"jobs\" ON \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
+        LEFT OUTER JOIN \"jobs\" ON \"jobs\".\"partition_id\" IS NOT NULL
+        AND \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
         AND \"jobs\".\"partition_id\" = \"pipelines\".\"partition_id\"
-        LEFT OUTER JOIN \"metadata\" ON \"metadata\".\"job_id\" = \"jobs\".\"id\"
+        LEFT OUTER JOIN \"metadata\" ON \"metadata\".\"partition_id\" IS NOT NULL
+        AND \"metadata\".\"job_id\" = \"jobs\".\"id\"
         AND \"metadata\".\"partition_id\" = \"jobs\".\"partition_id\"
         WHERE \"pipelines\".\"project_id\" = 1
         AND \"pipelines\".\"id\"
@@ -237,5 +239,75 @@
 
       expect(result).to include(preload_statement)
     end
+
+    it 'keeps join conditions from scope' do
+      preload_statement = <<~SQL.squish
+        SELECT \"pipelines\".\"id\"
+        AS t0_r0, \"pipelines\".\"project_id\"
+        AS t0_r1, \"pipelines\".\"partition_id\"
+        AS t0_r2, \"jobs\".\"id\"
+        AS t1_r0, \"jobs\".\"pipeline_id\"
+        AS t1_r1, \"jobs\".\"partition_id\"
+        AS t1_r2, \"jobs\".\"name\"
+        AS t1_r3, \"metadata\".\"id\"
+        AS t2_r0, \"metadata\".\"job_id\"
+        AS t2_r1, \"metadata\".\"partition_id\"
+        AS t2_r2, \"metadata\".\"test_flag\"
+        AS t2_r3
+        FROM \"pipelines\"
+        LEFT OUTER JOIN \"jobs\" ON \"jobs\".\"partition_id\" IS NOT NULL
+        AND \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
+        AND \"jobs\".\"partition_id\" = \"pipelines\".\"partition_id\"
+        LEFT OUTER JOIN \"metadata\" ON \"metadata\".\"test_flag\" = 1
+        AND \"metadata\".\"partition_id\" IS NOT NULL
+        AND \"metadata\".\"job_id\" = \"jobs\".\"id\"
+        AND \"metadata\".\"partition_id\" = \"jobs\".\"partition_id\"
+        WHERE \"pipelines\".\"project_id\" = 1
+        AND \"pipelines\".\"id\"
+        IN (#{pipeline.id}, #{other_pipeline.id})
+        AND \"pipelines\".\"partition_id\" = 100
+      SQL
+
+      result = QueryRecorder.log do
+        project
+          .pipelines
+          .includes(jobs: :test_metadata)
+          .references(:jobs, :test_metadata)
+          .where(id: [1, 2], partition_id: 100)
+          .to_a
+      end
+
+      expect(result).to include(preload_statement)
+    end
+
+    it 'does rewhere the partition_id condition when missing' do
+      preload_statement = <<~SQL.squish
+        SELECT \"pipelines\".\"id\"
+        AS t0_r0, \"pipelines\".\"project_id\"
+        AS t0_r1, \"pipelines\".\"partition_id\"
+        AS t0_r2, \"jobs\".\"id\"
+        AS t1_r0, \"jobs\".\"pipeline_id\"
+        AS t1_r1, \"jobs\".\"partition_id\"
+        AS t1_r2, \"jobs\".\"name\"
+        AS t1_r3 FROM \"pipelines\"
+        LEFT OUTER JOIN \"jobs\" ON \"jobs\".\"pipeline_id\" = NULL
+        AND \"jobs\".\"pipeline_id\" = \"pipelines\".\"id\"
+        AND \"jobs\".\"partition_id\" = \"pipelines\".\"partition_id\"
+        WHERE \"pipelines\".\"project_id\" = 1
+        AND \"pipelines\".\"id\" IN (1, 2)
+        AND \"pipelines\".\"partition_id\" = 100
+      SQL
+
+      result = QueryRecorder.log do
+        project
+          .pipelines
+          .includes(:unpartitioned_jobs)
+          .references(:unpartitioned_jobs)
+          .where(id: [1, 2], partition_id: 100)
+          .to_a
+      end
+
+      expect(result).to include(preload_statement)
+    end
   end
 end
diff --git a/gems/activerecord-gitlab/spec/support/models.rb b/gems/activerecord-gitlab/spec/support/models.rb
index 445dc8b2ac24b..8a1b37d5a7503 100644
--- a/gems/activerecord-gitlab/spec/support/models.rb
+++ b/gems/activerecord-gitlab/spec/support/models.rb
@@ -22,6 +22,12 @@ class Pipeline < PartitionedRecord
     ->(pipeline) { where(partition_id: pipeline.partition_id) },
     partition_foreign_key: :partition_id,
     dependent: :destroy
+
+  has_many :unpartitioned_jobs,
+    ->(pipeline) { where(pipeline: pipeline).order(id: :desc) },
+    partition_foreign_key: :partition_id,
+    dependent: :destroy,
+    class_name: 'Job'
 end
 
 class Job < PartitionedRecord
@@ -38,6 +44,13 @@ class Job < PartitionedRecord
     inverse_of: :job,
     autosave: true
 
+  has_one :test_metadata,
+    ->(build) { where(partition_id: build.partition_id, test_flag: true) },
+    foreign_key: :job_id,
+    partition_foreign_key: :partition_id,
+    inverse_of: :job,
+    class_name: 'Metadata'
+
   accepts_nested_attributes_for :metadata
 end
 
-- 
GitLab