diff --git a/keeps/helpers/file_helper.rb b/keeps/helpers/file_helper.rb new file mode 100644 index 0000000000000000000000000000000000000000..d6e57e196facd3ca81137b19aa46edf8f64eee03 --- /dev/null +++ b/keeps/helpers/file_helper.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require 'rubocop' + +module Keeps + module Helpers + class FileHelper + def initialize(file) + @file = file + @source = RuboCop::ProcessedSource.from_file(file, RuboCop::ConfigStore.new.for_file('.').target_ruby_version) + @rewriter = Parser::Source::TreeRewriter.new(@source.buffer) + end + + def replace_method_content(method_name, content, strip_comments_from_file: false) + method = source.ast.each_node(:class).first.each_node(:def).find do |child| + child.method_name == method_name.to_sym + end + + rewriter.replace(method.loc.expression, content) + + strip_comments if strip_comments_from_file + + File.write(file, process) + + process + end + + private + + attr_reader :file, :source, :rewriter, :corrector + + # Strip comments from the source file, except the for frozen_string_literal: true + def strip_comments + source.comments.each do |comment| + next if comment.text.include?('frozen_string_literal: true') + + rewriter.remove(comment_range(comment)) + end + end + + # Finds the proper range for the comment. + # + # @Note inline comments can cause trailing whitespaces. + # For such cases, the extra whitespace needs to be removed + def comment_range(comment) + range = comment.loc.expression + adjusted_range = range.adjust(begin_pos: -1) + + return range if comment.document? + + adjusted_range.source.start_with?(' ') ? adjusted_range : range + end + + def process + @process ||= rewriter.process.lstrip.gsub(/\n{3,}/, "\n\n") + end + end + end +end diff --git a/keeps/remove_duplicated_indexes.rb b/keeps/remove_duplicated_indexes.rb new file mode 100644 index 0000000000000000000000000000000000000000..15dc4282a13b01340f5dba0d6500b366c2a7e056 --- /dev/null +++ b/keeps/remove_duplicated_indexes.rb @@ -0,0 +1,202 @@ +# frozen_string_literal: true + +require 'rails/generators' +require 'rails/generators/active_record' +require 'rails/generators/active_record/migration/migration_generator' + +require_relative 'helpers/file_helper' +require_relative '../spec/support/helpers/database/duplicate_indexes' + +module Keeps + # This is an implementation of a ::Gitlab::Housekeeper::Keep. This keep will look for duplicated indexes + # and it will generate the corresponding files to have the index dropped. For each index to be dropped, the Keep will + # generate a new migration and its schema migration file. It also updates the `db/schema.sql` for each migration file. + # + # This keep uses the test databases to generate a updated version of the schema. Each time the keep is invoked it will + # recreate the `gitlabhq_test` and `gitlabhq_test_ci` databases. + # + # You can run it individually with: + # + # ``` + # bundle exec gitlab-housekeeper -d \ + # -k Keeps::RemoveDuplicatedIndexes + # ``` + class RemoveDuplicatedIndexes < ::Gitlab::Housekeeper::Keep + MIGRATION_TEMPLATE = 'generator_templates/active_record/migration/' + DEFAULT_REVIEWER_GROUP = 'database' + + def initialize + ::Gitlab::Application.load_tasks + ::ActiveRecord::Generators::MigrationGenerator.source_root(MIGRATION_TEMPLATE) + + @indexes_to_drop = {} + + reset_db + migrate + load_indexes_to_drop + + super + end + + def each_change + indexes_to_drop.each do |table_name, indexes| + change = build_change(table_name, indexes) + change.changed_files = [] + + indexes.each do |index_to_drop, _| + migration_file, migration_number = generate_migration_file(table_name, index_to_drop) + change.changed_files << migration_file + change.changed_files << Pathname.new('db').join('schema_migrations', migration_number).to_s + end + + migrate + + change.changed_files << Pathname.new('db').join('structure.sql').to_s + + yield(change) + + reset_db + end + end + + private + + attr_reader :indexes_to_drop + + def load_indexes_to_drop + establish_test_db_connection do |connection| + connection.tables.sort.each do |table| + # Skip partitioned tables for now + next if Gitlab::Database::PostgresPartition.partition_exists?(table) + + result = process_result(Database::DuplicateIndexes.new(table, connection.indexes(table)).duplicate_indexes) + + next if result.empty? + + indexes_to_drop[table] = result + end + end + end + + def build_change(table_name, indexes) + change = ::Gitlab::Housekeeper::Change.new + change.title = "Remove duplicated index from #{table_name}".truncate(70, omission: '') + change.identifiers = [self.class.name.demodulize, table_name] + change.labels = labels(table_name) + change.reviewers = pick_reviewers(table_name, change.identifiers).uniq + + removes_section = indexes.map do |index_to_drop, matching_indexes| + matching_indexes_table = matching_indexes.map do |idx| + <<-MARKDOWN.strip + | `#{idx.name}` | #{idx.columns.map { |col| "`#{col[:name]} #{col[:order]}`" }.join(', ')} | + MARKDOWN + end + + <<~MARKDOWN.strip + Drop `#{index_to_drop.name}` as it's already covered by: + | Index | Columns | + | ----- | ------ | + #{matching_indexes_table.join("\n")} + MARKDOWN + end + + change.description = <<~MARKDOWN.chomp + ## What does this MR do and why? + + Remove duplicated index from `#{table_name}` table. + + ### It removes: + + #{removes_section.join("\n")} + + It is possible that this MR will still need some changes to drop the index from the database. + Currently, the `gitlab-housekeeper` is not always capable of removing all references, so you must check the diff and pipeline failures to confirm if there are any issues. + Ensure that the index exists in the production database by checking Joe Bot trough https://console.postgres.ai/gitlab. + If the index was already removed or if the index it's being removed in another merge request, consider closing this merge request. + MARKDOWN + + change + end + + def process_result(duplicated_indexes) + duplicates_map = Hash.new { |h, k| h[k] = [] } + + duplicated_indexes.each do |index, duplicates| + duplicates.each do |duplicate| + duplicates_map[duplicate] << index + end + end + + duplicates_map + end + + def generate_migration_file(table_name, index_to_drop) + migration_name = "drop_#{index_to_drop.name}".truncate(100, omission: '') + generator = ::ActiveRecord::Generators::MigrationGenerator.new([migration_name]) + migration_content = <<~RUBY.strip + disable_ddl_transaction! + + TABLE_NAME = :#{table_name} + INDEX_NAME = :#{index_to_drop.name} + COLUMN_NAMES = #{index_to_drop.columns.map { |col| col[:name].to_sym }} + + def up + remove_concurrent_index_by_name(TABLE_NAME, INDEX_NAME) + end + + def down + add_concurrent_index(TABLE_NAME, COLUMN_NAMES, name: INDEX_NAME) + end + RUBY + + migration_file = generator.invoke_all.first + file_helper = ::Keeps::Helpers::FileHelper.new(migration_file) + file_helper.replace_method_content(:change, migration_content, strip_comments_from_file: true) + + ::Gitlab::Housekeeper::Shell.execute('rubocop', '-a', migration_file) + + [migration_file, generator.migration_number] + end + + def pick_reviewers(table_name, identifiers) + table_info = Gitlab::Database::Dictionary.entries.find_by_table_name(table_name) + + table_info.feature_categories.map do |feature_category| + group = groups_helper.group_for_feature_category(feature_category) + + group = groups_helper.group_for_feature_category(DEFAULT_REVIEWER_GROUP) if group['backend_engineers'].empty? + + groups_helper.pick_reviewer(group, identifiers) + end + end + + def labels(table_name) + table_info = Gitlab::Database::Dictionary.entries.find_by_table_name(table_name) + + group_labels = table_info.feature_categories.map do |feature_category| + groups_helper.group_for_feature_category(feature_category)['label'] + end + + group_labels + %w[maintenance::scalability type::maintenance Category:Database] + end + + def establish_test_db_connection + # rubocop:disable Database/EstablishConnection -- We should use test database only + yield ActiveRecord::Base.establish_connection(ActiveRecord::Base.configurations.find_db_config('test')).connection + # rubocop:enable Database/EstablishConnection + end + + def reset_db + ApplicationRecord.clear_all_connections! + ::Gitlab::Housekeeper::Shell.execute({ 'RAILS_ENV' => 'test' }, 'rails', 'db:reset') + end + + def migrate + ::Gitlab::Housekeeper::Shell.execute({ 'RAILS_ENV' => 'test' }, 'rails', 'db:migrate') + end + + def groups_helper + @groups_helper ||= ::Keeps::Helpers::Groups.new + end + end +end diff --git a/spec/keeps/helpers/file_helper_spec.rb b/spec/keeps/helpers/file_helper_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..05391f5bf3d833ea36c37f53a6f9ca1b5122a5fc --- /dev/null +++ b/spec/keeps/helpers/file_helper_spec.rb @@ -0,0 +1,140 @@ +# frozen_string_literal: true + +require 'spec_helper' +require './keeps/helpers/file_helper' + +RSpec.describe Keeps::Helpers::FileHelper, feature_category: :tooling do + let(:helper) { described_class.new(temp_file.path) } + let(:temp_file) { Tempfile.new(filename) } + let(:unparsed_content) do + <<~RUBY + # Migration type +class+ + # frozen_string_literal: true + + # See https://docs.gitlab.com/ee/development/migration_style_guide.html + # for more information on how to write migrations for GitLab. + + =begin + This migration adds + a new column to project + =end + class AddColToProjects < Gitlab::Database::Migration[2.2] + milestone '16.11' # Inline comment + + def change + add_column :projects, :bool_col, :boolean, default: false, null: false # adds a new column + end + end# Another inline comment + RUBY + end + + before do + temp_file.write(unparsed_content) + temp_file.rewind + temp_file.close + end + + after do + temp_file.unlink + end + + describe '#replace_method_content' do + before do + helper.replace_method_content(:change, content, strip_comments_from_file: strip_content) + end + + context 'when striping comments from file' do + let(:filename) { 'migration_two.txt' } + let(:strip_content) { true } + let(:content) do + <<~RUBY + disable_ddl_transaction! + + def up + add_column :projects, :bool_col, :boolean, default: false, null: false # adds a boolean type col + end + + def down + remove_column :projects, :bool_col, if_exists: true + end + RUBY + end + + let(:parsed_file) do + <<~RUBY + # frozen_string_literal: true + + class AddColToProjects < Gitlab::Database::Migration[2.2] + milestone '16.11' + + disable_ddl_transaction! + + def up + add_column :projects, :bool_col, :boolean, default: false, null: false # adds a boolean type col + end + + def down + remove_column :projects, :bool_col, if_exists: true + end + + end + RUBY + end + + it 'parses the file as expected' do + expect(temp_file.open.read).to eq(parsed_file) + end + end + + context 'when keeping comments in the file' do + let(:filename) { 'migration_two.txt' } + let(:strip_content) { false } + let(:content) do + <<~RUBY + disable_ddl_transaction! + + def up + add_column :projects, :bool_col, :boolean, default: false, null: false + end + + def down + remove_column :projects, :bool_col, if_exists: true + end + RUBY + end + + let(:parsed_file) do + <<~RUBY + # Migration type +class+ + # frozen_string_literal: true + + # See https://docs.gitlab.com/ee/development/migration_style_guide.html + # for more information on how to write migrations for GitLab. + + =begin + This migration adds + a new column to project + =end + class AddColToProjects < Gitlab::Database::Migration[2.2] + milestone '16.11' # Inline comment + + disable_ddl_transaction! + + def up + add_column :projects, :bool_col, :boolean, default: false, null: false + end + + def down + remove_column :projects, :bool_col, if_exists: true + end + + end# Another inline comment + RUBY + end + + it 'parses the file as expected' do + expect(temp_file.open.read).to eq(parsed_file) + end + end + end +end