From 7a75038aadb4db9abadea3c333d298c223eced5e Mon Sep 17 00:00:00 2001 From: Andreas Brandl <abrandl@gitlab.com> Date: Wed, 24 Jun 2020 21:32:06 +0000 Subject: [PATCH] Create time-space partitions in separate schema See https://gitlab.com/gitlab-org/gitlab/-/issues/220321 --- .../unreleased/ab-partition-management.yml | 5 ++ .../active_record_schema_ignore_tables.rb | 3 + ...121135_create_dynamic_partitions_schema.rb | 19 ++++++ db/structure.sql | 5 ++ lib/backup/database.rb | 10 ++- lib/gitlab/database.rb | 7 ++ .../table_management_helpers.rb | 2 +- lib/gitlab/database/schema_helpers.rb | 6 +- lib/tasks/gitlab/db.rake | 5 ++ spec/db/schema_spec.rb | 30 +++++++++ spec/lib/backup/database_spec.rb | 67 +++++++++++++++++++ .../table_management_helpers_spec.rb | 2 +- spec/lib/gitlab/database_spec.rb | 8 +++ spec/support/helpers/partitioning_helpers.rb | 9 ++- 14 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 changelogs/unreleased/ab-partition-management.yml create mode 100644 db/migrate/20200623121135_create_dynamic_partitions_schema.rb create mode 100644 spec/lib/backup/database_spec.rb diff --git a/changelogs/unreleased/ab-partition-management.yml b/changelogs/unreleased/ab-partition-management.yml new file mode 100644 index 0000000000000..c4c9c8ab432ea --- /dev/null +++ b/changelogs/unreleased/ab-partition-management.yml @@ -0,0 +1,5 @@ +--- +title: Create time-space partitions in separate schema gitlab_partitions_dynamic +merge_request: 35137 +author: +type: other diff --git a/config/initializers/active_record_schema_ignore_tables.rb b/config/initializers/active_record_schema_ignore_tables.rb index 661135f8ade36..8ac565f239e25 100644 --- a/config/initializers/active_record_schema_ignore_tables.rb +++ b/config/initializers/active_record_schema_ignore_tables.rb @@ -1,2 +1,5 @@ # Ignore table used temporarily in background migration ActiveRecord::SchemaDumper.ignore_tables = ["untracked_files_for_uploads"] + +# Ignore dynamically managed partitions in static application schema +ActiveRecord::SchemaDumper.ignore_tables += ["#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.*"] diff --git a/db/migrate/20200623121135_create_dynamic_partitions_schema.rb b/db/migrate/20200623121135_create_dynamic_partitions_schema.rb new file mode 100644 index 0000000000000..931a55ebcf4e0 --- /dev/null +++ b/db/migrate/20200623121135_create_dynamic_partitions_schema.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +class CreateDynamicPartitionsSchema < ActiveRecord::Migration[6.0] + include Gitlab::Database::SchemaHelpers + + DOWNTIME = false + + def up + execute 'CREATE SCHEMA gitlab_partitions_dynamic' + + create_comment(:schema, :gitlab_partitions_dynamic, <<~EOS.strip) + Schema to hold partitions managed dynamically from the application, e.g. for time space partitioning. + EOS + end + + def down + execute 'DROP SCHEMA gitlab_partitions_dynamic' + end +end diff --git a/db/structure.sql b/db/structure.sql index 4721c7ed80986..bcc5ae3de7bef 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -1,5 +1,9 @@ SET search_path=public; +CREATE SCHEMA gitlab_partitions_dynamic; + +COMMENT ON SCHEMA gitlab_partitions_dynamic IS 'Schema to hold partitions managed dynamically from the application, e.g. for time space partitioning.'; + CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public; CREATE TABLE public.abuse_reports ( @@ -14121,5 +14125,6 @@ COPY "schema_migrations" (version) FROM STDIN; 20200622235737 20200623000148 20200623000320 +20200623121135 \. diff --git a/lib/backup/database.rb b/lib/backup/database.rb index 7e457c4982d6f..d4c1ce260e491 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -27,12 +27,18 @@ def dump progress.print "Dumping PostgreSQL database #{config['database']} ... " pg_env pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump. + if Gitlab.config.backup.pg_schema - pgsql_args << "-n" + pgsql_args << '-n' pgsql_args << Gitlab.config.backup.pg_schema + + Gitlab::Database::EXTRA_SCHEMAS.each do |schema| + pgsql_args << '-n' + pgsql_args << schema.to_s + end end - spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr) + Process.spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr) end compress_wr.close diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb index 02005be1f6a78..8d28f09d2ad21 100644 --- a/lib/gitlab/database.rb +++ b/lib/gitlab/database.rb @@ -22,6 +22,13 @@ module Database MIN_SCHEMA_VERSION = 20190506135400 MIN_SCHEMA_GITLAB_VERSION = '11.11.0' + # Schema we store dynamically managed partitions in + DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic + + # This is an extensive list of postgres schemas owned by GitLab + # It does not include the default public schema + EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA].freeze + define_histogram :gitlab_database_transaction_seconds do docstring "Time spent in database transactions, in seconds" end diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb index abc4b2ba5461f..2acc51c171026 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb @@ -152,7 +152,7 @@ def to_sql_date_literal(date) end def create_range_partition_safely(partition_name, table_name, lower_bound, upper_bound) - if table_exists?(partition_name) + if table_exists?(table_for_range_partition(partition_name)) # rubocop:disable Gitlab/RailsLogger Rails.logger.warn "Partition not created because it already exists" \ " (this may be due to an aborted migration or similar): partition_name: #{partition_name}" diff --git a/lib/gitlab/database/schema_helpers.rb b/lib/gitlab/database/schema_helpers.rb index ad10fd610674e..34daafd06de3a 100644 --- a/lib/gitlab/database/schema_helpers.rb +++ b/lib/gitlab/database/schema_helpers.rb @@ -84,9 +84,13 @@ def assert_not_in_transaction_block(scope:) private + def table_for_range_partition(partition_name) + "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partition_name}" + end + def create_range_partition(partition_name, table_name, lower_bound, upper_bound) execute(<<~SQL) - CREATE TABLE #{partition_name} PARTITION OF #{table_name} + CREATE TABLE #{table_for_range_partition(partition_name)} PARTITION OF #{table_name} FOR VALUES FROM (#{lower_bound}) TO (#{upper_bound}) SQL end diff --git a/lib/tasks/gitlab/db.rake b/lib/tasks/gitlab/db.rake index 4917d496d0714..dc67c26042a38 100644 --- a/lib/tasks/gitlab/db.rake +++ b/lib/tasks/gitlab/db.rake @@ -39,6 +39,11 @@ namespace :gitlab do # PG: http://www.postgresql.org/docs/current/static/ddl-depend.html # Add `IF EXISTS` because cascade could have already deleted a table. tables.each { |t| connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name(t)} CASCADE") } + + # Drop all extra schema objects GitLab owns + Gitlab::Database::EXTRA_SCHEMAS.each do |schema| + connection.execute("DROP SCHEMA IF EXISTS #{connection.quote_table_name(schema)}") + end end desc 'GitLab | DB | Configures the database by running migrate, or by loading the schema and seeding if needed' diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb index 51ffd143836d9..be138992a6047 100644 --- a/spec/db/schema_spec.rb +++ b/spec/db/schema_spec.rb @@ -202,6 +202,36 @@ end end + context 'existence of Postgres schemas' do + def get_schemas + sql = <<~SQL + SELECT schema_name FROM + information_schema.schemata + WHERE + NOT schema_name ~* '^pg_' AND NOT schema_name = 'information_schema' + AND catalog_name = current_database() + SQL + + ApplicationRecord.connection.select_all(sql).map do |row| + row['schema_name'] + end + end + + it 'we have a public schema' do + expect(get_schemas).to include('public') + end + + Gitlab::Database::EXTRA_SCHEMAS.each do |schema| + it "we have a '#{schema}' schema'" do + expect(get_schemas).to include(schema.to_s) + end + end + + it 'we do not have unexpected schemas' do + expect(get_schemas.size).to eq(Gitlab::Database::EXTRA_SCHEMAS.size + 1) + end + end + private def retrieve_columns_name_with_jsonb diff --git a/spec/lib/backup/database_spec.rb b/spec/lib/backup/database_spec.rb new file mode 100644 index 0000000000000..81a5219680f39 --- /dev/null +++ b/spec/lib/backup/database_spec.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Backup::Database do + let(:progress) { double('progress', print: nil, puts: nil) } + + describe '#dump' do + subject { described_class.new(progress).dump } + + let(:pg_schema) { nil } + let(:backup_config) { double('config', pg_schema: pg_schema, path: File.join(Rails.root, 'tmp')) } + + before do + allow(Settings).to receive(:backup).and_return(backup_config) + allow(Process).to receive(:waitpid) + end + + it 'does not limit pg_dump to any specific schema' do + expect(Process).to receive(:spawn) do |*cmd, _| + expect(cmd.join(' ')).not_to include('-n') + end + + subject + end + + it 'includes option to drop objects before restoration' do + expect(Process).to receive(:spawn) do |*cmd, _| + expect(cmd.join(' ')).to include('--clean') + end + + subject + end + + context 'with pg_schema configured explicitly' do + let(:pg_schema) { 'some_schema' } + + it 'calls pg_dump' do + expect(Process).to receive(:spawn) do |*cmd, _| + expect(cmd.join(' ')).to start_with('pg_dump') + end + + subject + end + + it 'limits the psql dump to the specified schema' do + expect(Process).to receive(:spawn) do |*cmd, _| + expect(cmd.join(' ')).to include("-n #{pg_schema}") + end + + subject + end + + context 'extra schemas' do + Gitlab::Database::EXTRA_SCHEMAS.each do |schema| + it "includes the extra schema #{schema}" do + expect(Process).to receive(:spawn) do |*cmd, _| + expect(cmd.join(' ')).to include("-n #{schema}") + end + + subject + end + end + end + end + end +end diff --git a/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb b/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb index db7a65742c5f5..576ee1fc4c695 100644 --- a/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb +++ b/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb @@ -275,7 +275,7 @@ describe '#drop_partitioned_table_for' do let(:expected_tables) do - %w[000000 201912 202001 202002].map { |suffix| "#{partitioned_table}_#{suffix}" }.unshift(partitioned_table) + %w[000000 201912 202001 202002].map { |suffix| "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partitioned_table}_#{suffix}" }.unshift(partitioned_table) end context 'when the table is not allowed' do diff --git a/spec/lib/gitlab/database_spec.rb b/spec/lib/gitlab/database_spec.rb index b47ce374c8e5b..7a54ce6b8b763 100644 --- a/spec/lib/gitlab/database_spec.rb +++ b/spec/lib/gitlab/database_spec.rb @@ -7,6 +7,14 @@ stub_const('MigrationTest', Class.new { include Gitlab::Database }) end + describe 'EXTRA_SCHEMAS' do + it 'contains only schemas starting with gitlab_ prefix' do + described_class::EXTRA_SCHEMAS.each do |schema| + expect(schema.to_s).to start_with('gitlab_') + end + end + end + describe '.config' do it 'returns a Hash' do expect(described_class.config).to be_an_instance_of(Hash) diff --git a/spec/support/helpers/partitioning_helpers.rb b/spec/support/helpers/partitioning_helpers.rb index 98a13915d7698..5d8466e1ef4a9 100644 --- a/spec/support/helpers/partitioning_helpers.rb +++ b/spec/support/helpers/partitioning_helpers.rb @@ -9,7 +9,7 @@ def expect_table_partitioned_by(table, columns, part_type: :range) end def expect_range_partition_of(partition_name, table_name, min_value, max_value) - definition = find_partition_definition(partition_name) + definition = find_partition_definition(partition_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA) expect(definition).not_to be_nil expect(definition['base_table']).to eq(table_name.to_s) @@ -40,7 +40,7 @@ def find_partitioned_columns(table) SQL end - def find_partition_definition(partition) + def find_partition_definition(partition, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA) connection.select_one(<<~SQL) select parent_class.relname as base_table, @@ -48,7 +48,10 @@ def find_partition_definition(partition) from pg_class inner join pg_inherits i on pg_class.oid = inhrelid inner join pg_class parent_class on parent_class.oid = inhparent - where pg_class.relname = '#{partition}' and pg_class.relispartition; + inner join pg_namespace ON pg_namespace.oid = pg_class.relnamespace + where pg_namespace.nspname = '#{schema}' + and pg_class.relname = '#{partition}' + and pg_class.relispartition SQL end end -- GitLab