From 7a75038aadb4db9abadea3c333d298c223eced5e Mon Sep 17 00:00:00 2001
From: Andreas Brandl <abrandl@gitlab.com>
Date: Wed, 24 Jun 2020 21:32:06 +0000
Subject: [PATCH] Create time-space partitions in separate schema

See https://gitlab.com/gitlab-org/gitlab/-/issues/220321
---
 .../unreleased/ab-partition-management.yml    |  5 ++
 .../active_record_schema_ignore_tables.rb     |  3 +
 ...121135_create_dynamic_partitions_schema.rb | 19 ++++++
 db/structure.sql                              |  5 ++
 lib/backup/database.rb                        | 10 ++-
 lib/gitlab/database.rb                        |  7 ++
 .../table_management_helpers.rb               |  2 +-
 lib/gitlab/database/schema_helpers.rb         |  6 +-
 lib/tasks/gitlab/db.rake                      |  5 ++
 spec/db/schema_spec.rb                        | 30 +++++++++
 spec/lib/backup/database_spec.rb              | 67 +++++++++++++++++++
 .../table_management_helpers_spec.rb          |  2 +-
 spec/lib/gitlab/database_spec.rb              |  8 +++
 spec/support/helpers/partitioning_helpers.rb  |  9 ++-
 14 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 changelogs/unreleased/ab-partition-management.yml
 create mode 100644 db/migrate/20200623121135_create_dynamic_partitions_schema.rb
 create mode 100644 spec/lib/backup/database_spec.rb

diff --git a/changelogs/unreleased/ab-partition-management.yml b/changelogs/unreleased/ab-partition-management.yml
new file mode 100644
index 0000000000000..c4c9c8ab432ea
--- /dev/null
+++ b/changelogs/unreleased/ab-partition-management.yml
@@ -0,0 +1,5 @@
+---
+title: Create time-space partitions in separate schema gitlab_partitions_dynamic
+merge_request: 35137
+author:
+type: other
diff --git a/config/initializers/active_record_schema_ignore_tables.rb b/config/initializers/active_record_schema_ignore_tables.rb
index 661135f8ade36..8ac565f239e25 100644
--- a/config/initializers/active_record_schema_ignore_tables.rb
+++ b/config/initializers/active_record_schema_ignore_tables.rb
@@ -1,2 +1,5 @@
 # Ignore table used temporarily in background migration
 ActiveRecord::SchemaDumper.ignore_tables = ["untracked_files_for_uploads"]
+
+# Ignore dynamically managed partitions in static application schema
+ActiveRecord::SchemaDumper.ignore_tables += ["#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.*"]
diff --git a/db/migrate/20200623121135_create_dynamic_partitions_schema.rb b/db/migrate/20200623121135_create_dynamic_partitions_schema.rb
new file mode 100644
index 0000000000000..931a55ebcf4e0
--- /dev/null
+++ b/db/migrate/20200623121135_create_dynamic_partitions_schema.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+class CreateDynamicPartitionsSchema < ActiveRecord::Migration[6.0]
+  include Gitlab::Database::SchemaHelpers
+
+  DOWNTIME = false
+
+  def up
+    execute 'CREATE SCHEMA gitlab_partitions_dynamic'
+
+    create_comment(:schema, :gitlab_partitions_dynamic, <<~EOS.strip)
+      Schema to hold partitions managed dynamically from the application, e.g. for time space partitioning.
+    EOS
+  end
+
+  def down
+    execute 'DROP SCHEMA gitlab_partitions_dynamic'
+  end
+end
diff --git a/db/structure.sql b/db/structure.sql
index 4721c7ed80986..bcc5ae3de7bef 100644
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -1,5 +1,9 @@
 SET search_path=public;
 
+CREATE SCHEMA gitlab_partitions_dynamic;
+
+COMMENT ON SCHEMA gitlab_partitions_dynamic IS 'Schema to hold partitions managed dynamically from the application, e.g. for time space partitioning.';
+
 CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
 
 CREATE TABLE public.abuse_reports (
@@ -14121,5 +14125,6 @@ COPY "schema_migrations" (version) FROM STDIN;
 20200622235737
 20200623000148
 20200623000320
+20200623121135
 \.
 
diff --git a/lib/backup/database.rb b/lib/backup/database.rb
index 7e457c4982d6f..d4c1ce260e491 100644
--- a/lib/backup/database.rb
+++ b/lib/backup/database.rb
@@ -27,12 +27,18 @@ def dump
           progress.print "Dumping PostgreSQL database #{config['database']} ... "
           pg_env
           pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump.
+
           if Gitlab.config.backup.pg_schema
-            pgsql_args << "-n"
+            pgsql_args << '-n'
             pgsql_args << Gitlab.config.backup.pg_schema
+
+            Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
+              pgsql_args << '-n'
+              pgsql_args << schema.to_s
+            end
           end
 
-          spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr)
+          Process.spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr)
         end
       compress_wr.close
 
diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb
index 02005be1f6a78..8d28f09d2ad21 100644
--- a/lib/gitlab/database.rb
+++ b/lib/gitlab/database.rb
@@ -22,6 +22,13 @@ module Database
     MIN_SCHEMA_VERSION = 20190506135400
     MIN_SCHEMA_GITLAB_VERSION = '11.11.0'
 
+    # Schema we store dynamically managed partitions in
+    DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic
+
+    # This is an extensive list of postgres schemas owned by GitLab
+    # It does not include the default public schema
+    EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA].freeze
+
     define_histogram :gitlab_database_transaction_seconds do
       docstring "Time spent in database transactions, in seconds"
     end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
index abc4b2ba5461f..2acc51c171026 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
@@ -152,7 +152,7 @@ def to_sql_date_literal(date)
         end
 
         def create_range_partition_safely(partition_name, table_name, lower_bound, upper_bound)
-          if table_exists?(partition_name)
+          if table_exists?(table_for_range_partition(partition_name))
             # rubocop:disable Gitlab/RailsLogger
             Rails.logger.warn "Partition not created because it already exists" \
               " (this may be due to an aborted migration or similar): partition_name: #{partition_name}"
diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb
index ad10fd610674e..34daafd06de3a 100644
--- a/lib/gitlab/database/schema_helpers.rb
+++ b/lib/gitlab/database/schema_helpers.rb
@@ -84,9 +84,13 @@ def assert_not_in_transaction_block(scope:)
 
       private
 
+      def table_for_range_partition(partition_name)
+        "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partition_name}"
+      end
+
       def create_range_partition(partition_name, table_name, lower_bound, upper_bound)
         execute(<<~SQL)
-          CREATE TABLE #{partition_name} PARTITION OF #{table_name}
+          CREATE TABLE #{table_for_range_partition(partition_name)} PARTITION OF #{table_name}
           FOR VALUES FROM (#{lower_bound}) TO (#{upper_bound})
         SQL
       end
diff --git a/lib/tasks/gitlab/db.rake b/lib/tasks/gitlab/db.rake
index 4917d496d0714..dc67c26042a38 100644
--- a/lib/tasks/gitlab/db.rake
+++ b/lib/tasks/gitlab/db.rake
@@ -39,6 +39,11 @@ namespace :gitlab do
       # PG: http://www.postgresql.org/docs/current/static/ddl-depend.html
       # Add `IF EXISTS` because cascade could have already deleted a table.
       tables.each { |t| connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name(t)} CASCADE") }
+
+      # Drop all extra schema objects GitLab owns
+      Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
+        connection.execute("DROP SCHEMA IF EXISTS #{connection.quote_table_name(schema)}")
+      end
     end
 
     desc 'GitLab | DB | Configures the database by running migrate, or by loading the schema and seeding if needed'
diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb
index 51ffd143836d9..be138992a6047 100644
--- a/spec/db/schema_spec.rb
+++ b/spec/db/schema_spec.rb
@@ -202,6 +202,36 @@
     end
   end
 
+  context 'existence of Postgres schemas' do
+    def get_schemas
+      sql = <<~SQL
+        SELECT schema_name FROM
+        information_schema.schemata
+        WHERE
+        NOT schema_name ~* '^pg_' AND NOT schema_name = 'information_schema'
+        AND catalog_name = current_database()
+      SQL
+
+      ApplicationRecord.connection.select_all(sql).map do |row|
+        row['schema_name']
+      end
+    end
+
+    it 'we have a public schema' do
+      expect(get_schemas).to include('public')
+    end
+
+    Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
+      it "we have a '#{schema}' schema'" do
+        expect(get_schemas).to include(schema.to_s)
+      end
+    end
+
+    it 'we do not have unexpected schemas' do
+      expect(get_schemas.size).to eq(Gitlab::Database::EXTRA_SCHEMAS.size + 1)
+    end
+  end
+
   private
 
   def retrieve_columns_name_with_jsonb
diff --git a/spec/lib/backup/database_spec.rb b/spec/lib/backup/database_spec.rb
new file mode 100644
index 0000000000000..81a5219680f39
--- /dev/null
+++ b/spec/lib/backup/database_spec.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Backup::Database do
+  let(:progress) { double('progress', print: nil, puts: nil) }
+
+  describe '#dump' do
+    subject { described_class.new(progress).dump }
+
+    let(:pg_schema) { nil }
+    let(:backup_config) { double('config', pg_schema: pg_schema, path: File.join(Rails.root, 'tmp')) }
+
+    before do
+      allow(Settings).to receive(:backup).and_return(backup_config)
+      allow(Process).to receive(:waitpid)
+    end
+
+    it 'does not limit pg_dump to any specific schema' do
+      expect(Process).to receive(:spawn) do |*cmd, _|
+        expect(cmd.join(' ')).not_to include('-n')
+      end
+
+      subject
+    end
+
+    it 'includes option to drop objects before restoration' do
+      expect(Process).to receive(:spawn) do |*cmd, _|
+        expect(cmd.join(' ')).to include('--clean')
+      end
+
+      subject
+    end
+
+    context 'with pg_schema configured explicitly' do
+      let(:pg_schema) { 'some_schema' }
+
+      it 'calls pg_dump' do
+        expect(Process).to receive(:spawn) do |*cmd, _|
+          expect(cmd.join(' ')).to start_with('pg_dump')
+        end
+
+        subject
+      end
+
+      it 'limits the psql dump to the specified schema' do
+        expect(Process).to receive(:spawn) do |*cmd, _|
+          expect(cmd.join(' ')).to include("-n #{pg_schema}")
+        end
+
+        subject
+      end
+
+      context 'extra schemas' do
+        Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
+          it "includes the extra schema #{schema}" do
+            expect(Process).to receive(:spawn) do |*cmd, _|
+              expect(cmd.join(' ')).to include("-n #{schema}")
+            end
+
+            subject
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb b/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb
index db7a65742c5f5..576ee1fc4c695 100644
--- a/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb
+++ b/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb
@@ -275,7 +275,7 @@
 
   describe '#drop_partitioned_table_for' do
     let(:expected_tables) do
-      %w[000000 201912 202001 202002].map { |suffix| "#{partitioned_table}_#{suffix}" }.unshift(partitioned_table)
+      %w[000000 201912 202001 202002].map { |suffix| "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partitioned_table}_#{suffix}" }.unshift(partitioned_table)
     end
 
     context 'when the table is not allowed' do
diff --git a/spec/lib/gitlab/database_spec.rb b/spec/lib/gitlab/database_spec.rb
index b47ce374c8e5b..7a54ce6b8b763 100644
--- a/spec/lib/gitlab/database_spec.rb
+++ b/spec/lib/gitlab/database_spec.rb
@@ -7,6 +7,14 @@
     stub_const('MigrationTest', Class.new { include Gitlab::Database })
   end
 
+  describe 'EXTRA_SCHEMAS' do
+    it 'contains only schemas starting with gitlab_ prefix' do
+      described_class::EXTRA_SCHEMAS.each do |schema|
+        expect(schema.to_s).to start_with('gitlab_')
+      end
+    end
+  end
+
   describe '.config' do
     it 'returns a Hash' do
       expect(described_class.config).to be_an_instance_of(Hash)
diff --git a/spec/support/helpers/partitioning_helpers.rb b/spec/support/helpers/partitioning_helpers.rb
index 98a13915d7698..5d8466e1ef4a9 100644
--- a/spec/support/helpers/partitioning_helpers.rb
+++ b/spec/support/helpers/partitioning_helpers.rb
@@ -9,7 +9,7 @@ def expect_table_partitioned_by(table, columns, part_type: :range)
   end
 
   def expect_range_partition_of(partition_name, table_name, min_value, max_value)
-    definition = find_partition_definition(partition_name)
+    definition = find_partition_definition(partition_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA)
 
     expect(definition).not_to be_nil
     expect(definition['base_table']).to eq(table_name.to_s)
@@ -40,7 +40,7 @@ def find_partitioned_columns(table)
     SQL
   end
 
-  def find_partition_definition(partition)
+  def find_partition_definition(partition, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA)
     connection.select_one(<<~SQL)
       select
         parent_class.relname as base_table,
@@ -48,7 +48,10 @@ def find_partition_definition(partition)
       from pg_class
       inner join pg_inherits i on pg_class.oid = inhrelid
       inner join pg_class parent_class on parent_class.oid = inhparent
-      where pg_class.relname = '#{partition}' and pg_class.relispartition;
+      inner join pg_namespace ON pg_namespace.oid = pg_class.relnamespace
+      where pg_namespace.nspname = '#{schema}'
+        and pg_class.relname = '#{partition}'
+        and pg_class.relispartition
     SQL
   end
 end
-- 
GitLab