From 7eb80067a2d78c5d200683d5d3faadc77049e121 Mon Sep 17 00:00:00 2001
From: Marius Bobin <mbobin@gitlab.com>
Date: Wed, 15 Nov 2023 11:34:59 +0000
Subject: [PATCH] Automatically create next partitions for CI tables

---
 app/models/ci/pipeline.rb                     |   4 +-
 app/models/concerns/ci/partitionable.rb       |   4 +-
 config/initializers/postgres_partitioning.rb  |   1 +
 db/structure.sql                              | 156 +++++++++---------
 ...cond_partition_for_builds_metadata_spec.rb |  16 +-
 spec/models/concerns/ci/partitionable_spec.rb |  58 ++++++-
 6 files changed, 151 insertions(+), 88 deletions(-)

diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb
index cf3efc5998fe1..54aa3d78cf35f 100644
--- a/app/models/ci/pipeline.rb
+++ b/app/models/ci/pipeline.rb
@@ -35,6 +35,8 @@ class Pipeline < Ci::ApplicationRecord
 
     CANCELABLE_STATUSES = (Ci::HasStatus::CANCELABLE_STATUSES + ['manual']).freeze
     UNLOCKABLE_STATUSES = (Ci::Pipeline.completed_statuses + [:manual]).freeze
+    INITIAL_PARTITION_VALUE = 100
+    NEXT_PARTITION_VALUE = 101
 
     paginates_per 15
 
@@ -596,7 +598,7 @@ def self.auto_devops_pipelines_completed_total
     end
 
     def self.current_partition_value
-      100
+      INITIAL_PARTITION_VALUE
     end
 
     def self.object_hierarchy(relation, options = {})
diff --git a/app/models/concerns/ci/partitionable.rb b/app/models/concerns/ci/partitionable.rb
index c4b1281fa72a0..aaf07bfee3a6d 100644
--- a/app/models/concerns/ci/partitionable.rb
+++ b/app/models/concerns/ci/partitionable.rb
@@ -106,7 +106,9 @@ def handle_partitionable_ddl(partitioned)
 
         partitioned_by :partition_id,
           strategy: :ci_sliding_list,
-          next_partition_if: proc { false },
+          next_partition_if: ->(latest_partition) do
+            latest_partition.blank? || Ci::Pipeline::NEXT_PARTITION_VALUE > latest_partition.value
+          end,
           detach_partition_if: proc { false },
           # Most of the db tasks are run in a weekly basis, e.g. execute_batched_migrations.
           # Therefore, let's start with 1.week and see how it'd go.
diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb
index 073b487ff7310..458feacba0d8e 100644
--- a/config/initializers/postgres_partitioning.rb
+++ b/config/initializers/postgres_partitioning.rb
@@ -9,6 +9,7 @@
     Ci::RunnerManagerBuild,
     Ci::JobAnnotation,
     Ci::BuildMetadata,
+    CommitStatus,
     BatchedGitRefUpdates::Deletion,
     Users::ProjectVisit,
     Users::GroupVisit
diff --git a/db/structure.sql b/db/structure.sql
index b259755534e6a..b3e11f46f6eb8 100644
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -787,6 +787,84 @@ CREATE TABLE batched_background_migration_job_transition_logs (
 )
 PARTITION BY RANGE (created_at);
 
+CREATE TABLE p_ci_builds (
+    status character varying,
+    finished_at timestamp without time zone,
+    created_at timestamp without time zone,
+    updated_at timestamp without time zone,
+    started_at timestamp without time zone,
+    runner_id integer,
+    coverage double precision,
+    commit_id integer,
+    name character varying,
+    options text,
+    allow_failure boolean DEFAULT false NOT NULL,
+    stage character varying,
+    trigger_request_id integer,
+    stage_idx integer,
+    tag boolean,
+    ref character varying,
+    user_id integer,
+    type character varying,
+    target_url character varying,
+    description character varying,
+    project_id integer,
+    erased_by_id integer,
+    erased_at timestamp without time zone,
+    artifacts_expire_at timestamp without time zone,
+    environment character varying,
+    "when" character varying,
+    yaml_variables text,
+    queued_at timestamp without time zone,
+    lock_version integer DEFAULT 0,
+    coverage_regex character varying,
+    auto_canceled_by_id integer,
+    retried boolean,
+    protected boolean,
+    failure_reason integer,
+    scheduled_at timestamp with time zone,
+    token_encrypted character varying,
+    upstream_pipeline_id integer,
+    resource_group_id bigint,
+    waiting_for_resource_at timestamp with time zone,
+    processed boolean,
+    scheduling_type smallint,
+    id bigint NOT NULL,
+    stage_id bigint,
+    partition_id bigint NOT NULL,
+    auto_canceled_by_partition_id bigint DEFAULT 100 NOT NULL,
+    auto_canceled_by_id_convert_to_bigint bigint,
+    commit_id_convert_to_bigint bigint,
+    erased_by_id_convert_to_bigint bigint,
+    project_id_convert_to_bigint bigint,
+    runner_id_convert_to_bigint bigint,
+    trigger_request_id_convert_to_bigint bigint,
+    upstream_pipeline_id_convert_to_bigint bigint,
+    user_id_convert_to_bigint bigint,
+    CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL))
+)
+PARTITION BY LIST (partition_id);
+
+CREATE TABLE p_ci_builds_metadata (
+    project_id integer NOT NULL,
+    timeout integer,
+    timeout_source integer DEFAULT 1 NOT NULL,
+    interruptible boolean,
+    config_options jsonb,
+    config_variables jsonb,
+    has_exposed_artifacts boolean,
+    environment_auto_stop_in character varying(255),
+    expanded_environment_name character varying(255),
+    secrets jsonb DEFAULT '{}'::jsonb NOT NULL,
+    build_id bigint NOT NULL,
+    id bigint NOT NULL,
+    runtime_runner_features jsonb DEFAULT '{}'::jsonb NOT NULL,
+    id_tokens jsonb DEFAULT '{}'::jsonb NOT NULL,
+    partition_id bigint NOT NULL,
+    debug_trace_enabled boolean DEFAULT false NOT NULL
+)
+PARTITION BY LIST (partition_id);
+
 CREATE TABLE p_ci_job_annotations (
     id bigint NOT NULL,
     partition_id bigint NOT NULL,
@@ -13609,64 +13687,6 @@ CREATE TABLE ci_build_trace_metadata (
     partition_id bigint NOT NULL
 );
 
-CREATE TABLE p_ci_builds (
-    status character varying,
-    finished_at timestamp without time zone,
-    created_at timestamp without time zone,
-    updated_at timestamp without time zone,
-    started_at timestamp without time zone,
-    runner_id integer,
-    coverage double precision,
-    commit_id integer,
-    name character varying,
-    options text,
-    allow_failure boolean DEFAULT false NOT NULL,
-    stage character varying,
-    trigger_request_id integer,
-    stage_idx integer,
-    tag boolean,
-    ref character varying,
-    user_id integer,
-    type character varying,
-    target_url character varying,
-    description character varying,
-    project_id integer,
-    erased_by_id integer,
-    erased_at timestamp without time zone,
-    artifacts_expire_at timestamp without time zone,
-    environment character varying,
-    "when" character varying,
-    yaml_variables text,
-    queued_at timestamp without time zone,
-    lock_version integer DEFAULT 0,
-    coverage_regex character varying,
-    auto_canceled_by_id integer,
-    retried boolean,
-    protected boolean,
-    failure_reason integer,
-    scheduled_at timestamp with time zone,
-    token_encrypted character varying,
-    upstream_pipeline_id integer,
-    resource_group_id bigint,
-    waiting_for_resource_at timestamp with time zone,
-    processed boolean,
-    scheduling_type smallint,
-    id bigint NOT NULL,
-    stage_id bigint,
-    partition_id bigint NOT NULL,
-    auto_canceled_by_partition_id bigint DEFAULT 100 NOT NULL,
-    auto_canceled_by_id_convert_to_bigint bigint,
-    commit_id_convert_to_bigint bigint,
-    erased_by_id_convert_to_bigint bigint,
-    project_id_convert_to_bigint bigint,
-    runner_id_convert_to_bigint bigint,
-    trigger_request_id_convert_to_bigint bigint,
-    upstream_pipeline_id_convert_to_bigint bigint,
-    user_id_convert_to_bigint bigint,
-    CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL))
-)
-PARTITION BY LIST (partition_id);
-
 CREATE TABLE ci_builds (
     status character varying,
     finished_at timestamp without time zone,
@@ -13733,26 +13753,6 @@ CREATE SEQUENCE ci_builds_id_seq
 
 ALTER SEQUENCE ci_builds_id_seq OWNED BY p_ci_builds.id;
 
-CREATE TABLE p_ci_builds_metadata (
-    project_id integer NOT NULL,
-    timeout integer,
-    timeout_source integer DEFAULT 1 NOT NULL,
-    interruptible boolean,
-    config_options jsonb,
-    config_variables jsonb,
-    has_exposed_artifacts boolean,
-    environment_auto_stop_in character varying(255),
-    expanded_environment_name character varying(255),
-    secrets jsonb DEFAULT '{}'::jsonb NOT NULL,
-    build_id bigint NOT NULL,
-    id bigint NOT NULL,
-    runtime_runner_features jsonb DEFAULT '{}'::jsonb NOT NULL,
-    id_tokens jsonb DEFAULT '{}'::jsonb NOT NULL,
-    partition_id bigint NOT NULL,
-    debug_trace_enabled boolean DEFAULT false NOT NULL
-)
-PARTITION BY LIST (partition_id);
-
 CREATE SEQUENCE ci_builds_metadata_id_seq
     START WITH 1
     INCREMENT BY 1
diff --git a/spec/migrations/20221102090943_create_second_partition_for_builds_metadata_spec.rb b/spec/migrations/20221102090943_create_second_partition_for_builds_metadata_spec.rb
index b4bd51363830f..52a6242f38390 100644
--- a/spec/migrations/20221102090943_create_second_partition_for_builds_metadata_spec.rb
+++ b/spec/migrations/20221102090943_create_second_partition_for_builds_metadata_spec.rb
@@ -14,7 +14,9 @@
       end
 
       it 'creates a new partition' do
-        expect { migrate! }.to change { partitions_count }.by(1)
+        migrate!
+
+        expect(partition_101_exists?).to be(true)
       end
     end
 
@@ -24,7 +26,7 @@
       end
 
       it 'does not create the partition' do
-        expect { migrate! }.not_to change { partitions_count }
+        expect { migrate! }.not_to change { partition_101_exists? }
       end
     end
   end
@@ -38,7 +40,7 @@
       it 'removes the partition' do
         migrate!
 
-        expect { migration.down }.to change { partitions_count }.by(-1)
+        expect { migration.down }.to change { partition_101_exists? }.to(false)
       end
     end
 
@@ -50,12 +52,14 @@
       it 'does not change the partitions count' do
         migrate!
 
-        expect { migration.down }.not_to change { partitions_count }
+        expect { migration.down }.not_to change { partition_101_exists? }
       end
     end
   end
 
-  def partitions_count
-    Gitlab::Database::PostgresPartition.for_parent_table(:p_ci_builds_metadata).size
+  def partition_101_exists?
+    Gitlab::Database::PostgresPartition
+      .for_parent_table(:p_ci_builds_metadata)
+      .where(name: :ci_builds_metadata_101).any?
   end
 end
diff --git a/spec/models/concerns/ci/partitionable_spec.rb b/spec/models/concerns/ci/partitionable_spec.rb
index 6daafc78cffce..a4edfe2bdbdda 100644
--- a/spec/models/concerns/ci/partitionable_spec.rb
+++ b/spec/models/concerns/ci/partitionable_spec.rb
@@ -5,6 +5,12 @@
 RSpec.describe Ci::Partitionable do
   let(:ci_model) { Class.new(Ci::ApplicationRecord) }
 
+  around do |ex|
+    Gitlab::Database::SharedModel.using_connection(ci_model.connection) do
+      ex.run
+    end
+  end
+
   describe 'partitionable models inclusion' do
     subject { ci_model.include(described_class) }
 
@@ -61,10 +67,58 @@
 
     context 'when partitioned is true' do
       let(:partitioned) { true }
+      let(:partitioning_strategy) { ci_model.partitioning_strategy }
 
       it { expect(ci_model.ancestors).to include(PartitionedTable) }
-      it { expect(ci_model.partitioning_strategy).to be_a(Gitlab::Database::Partitioning::CiSlidingListStrategy) }
-      it { expect(ci_model.partitioning_strategy.partitioning_key).to eq(:partition_id) }
+      it { expect(partitioning_strategy).to be_a(Gitlab::Database::Partitioning::CiSlidingListStrategy) }
+      it { expect(partitioning_strategy.partitioning_key).to eq(:partition_id) }
+
+      describe 'next_partition_if callback' do
+        let(:active_partition) { partitioning_strategy.active_partition }
+
+        let(:table_options) do
+          {
+            primary_key: [:id, :partition_id],
+            options: 'PARTITION BY LIST (partition_id)',
+            if_not_exists: false
+          }
+        end
+
+        before do
+          ci_model.connection.create_table(:_test_table_name, **table_options) do |t|
+            t.bigserial :id, null: false
+            t.bigint :partition_id, null: false
+          end
+
+          ci_model.table_name = :_test_table_name
+        end
+
+        subject(:value) { partitioning_strategy.next_partition_if.call(active_partition) }
+
+        context 'without any existing partitions' do
+          it { is_expected.to eq(true) }
+        end
+
+        context 'with initial partition attached' do
+          before do
+            ci_model.connection.execute(<<~SQL)
+              CREATE TABLE IF NOT EXISTS _test_table_name_100 PARTITION OF _test_table_name FOR VALUES IN (100);
+            SQL
+          end
+
+          it { is_expected.to eq(true) }
+        end
+
+        context 'with an existing partition for partition_id = 101' do
+          before do
+            ci_model.connection.execute(<<~SQL)
+              CREATE TABLE IF NOT EXISTS _test_table_name_101 PARTITION OF _test_table_name FOR VALUES IN (101);
+            SQL
+          end
+
+          it { is_expected.to eq(false) }
+        end
+      end
     end
 
     context 'when partitioned is false' do
-- 
GitLab