From f2e2ead8e4988288d617f9f7399e80db3e7c21d7 Mon Sep 17 00:00:00 2001 From: Omar Qunsul <oqunsul@gitlab.com> Date: Wed, 23 Aug 2023 10:33:02 +0000 Subject: [PATCH] Detecting duplicate btree indexes Adding tests to detect duplicate btree indexes. This will help us avoid adding unnecessary indexes Changelog: other --- spec/db/schema_spec.rb | 43 +++ .../helpers/database/duplicate_indexes.rb | 77 +++++ .../helpers/database/duplicate_indexes.yml | 264 ++++++++++++++++++ .../database/duplicate_indexes_spec.rb | 108 +++++++ 4 files changed, 492 insertions(+) create mode 100644 spec/support/helpers/database/duplicate_indexes.rb create mode 100644 spec/support/helpers/database/duplicate_indexes.yml create mode 100644 spec/support_specs/database/duplicate_indexes_spec.rb diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb index a7c00d2be326..fbd9c4d32cca 100644 --- a/spec/db/schema_spec.rb +++ b/spec/db/schema_spec.rb @@ -187,6 +187,44 @@ expect(ignored_columns).to match_array(ignored_columns - foreign_keys) end end + + context 'btree indexes' do + it 'only has existing indexes in the ignored duplicate indexes duplicate_indexes.yml' do + table_ignored_indexes = (ignored_indexes[table] || {}).to_a.flatten.uniq + indexes_by_name = indexes.map(&:name) + expect(indexes_by_name).to include(*table_ignored_indexes) + end + + it 'does not have any duplicated indexes' do + duplicate_indexes = Database::DuplicateIndexes.new(table, indexes).duplicate_indexes + expect(duplicate_indexes).to be_an_instance_of Hash + + table_ignored_indexes = ignored_indexes[table] || {} + + # We ignore all the indexes that are explicitly ignored in duplicate_indexes.yml + duplicate_indexes.each do |index, matching_indexes| + duplicate_indexes[index] = matching_indexes.reject do |matching_index| + table_ignored_indexes.fetch(index.name, []).include?(matching_index.name) || + table_ignored_indexes.fetch(matching_index.name, []).include?(index.name) + end + + duplicate_indexes.delete(index) if duplicate_indexes[index].empty? + end + + if duplicate_indexes.present? + btree_index = duplicate_indexes.each_key.first + matching_indexes = duplicate_indexes[btree_index] + + error_message = <<~ERROR + Duplicate index: #{btree_index.name} with #{matching_indexes.map(&:name)} + #{btree_index.name} : #{btree_index.columns.inspect} + #{matching_indexes.first.name} : #{matching_indexes.first.columns.inspect}. + Consider dropping the indexes #{matching_indexes.map(&:name).join(', ')} + ERROR + raise error_message + end + end + end end end end @@ -409,4 +447,9 @@ def ignored_limit_enums(model) def ignored_jsonb_columns(model) IGNORED_JSONB_COLUMNS.fetch(model, []) end + + def ignored_indexes + duplicate_indexes_file_path = "spec/support/helpers/database/duplicate_indexes.yml" + @ignored_indexes ||= YAML.load_file(Rails.root.join(duplicate_indexes_file_path)) || {} + end end diff --git a/spec/support/helpers/database/duplicate_indexes.rb b/spec/support/helpers/database/duplicate_indexes.rb new file mode 100644 index 000000000000..0ad8ee1e0554 --- /dev/null +++ b/spec/support/helpers/database/duplicate_indexes.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module Database + class DuplicateIndexes + attr_accessor :table_name, :indexes + + BTREE_INDEX_STRUCT = Struct.new(:name, :columns, :unique) + + def initialize(table_name, indexes) + @table_name = table_name + @indexes = indexes + end + + def duplicate_indexes + ret = {} + + btree_indexes.each do |btree_index| + matching_indexes = matching_indexes_for(btree_index) + next unless matching_indexes.any? + + ret[btree_index] = matching_indexes + end + + ret + end + + def self.btree_index_struct(index) + BTREE_INDEX_STRUCT.new( + index.name, + Array.wrap(index.columns).map do |column| + # https://apidock.com/rails/ActiveRecord/ConnectionAdapters/PostgreSQL/SchemaStatements/indexes + # asc is the default order + column_order = index.orders.is_a?(Symbol) ? index.orders : (index.orders[column] || :asc) + { name: column, order: column_order } + end, + index.unique + ) + end + + private + + def btree_indexes + return @btree_indexes if @btree_indexes + + # We only scan non-conditional btree indexes + @btree_indexes = indexes.select do |index| + index.using == :btree && index.where.nil? && index.opclasses.blank? + end + + @btree_indexes = @btree_indexes.map { |index| self.class.btree_index_struct(index) } + end + + def matching_indexes_for(btree_index) + all_matching_indexes = [] + + # When comparing btree_index with other_index. btree_index is the index that can have more columns + # than the other_index. + (1..btree_index.columns.length).each do |subset_length| + columns = btree_index.columns.first(subset_length) + matching_indexes = btree_indexes.reject { |other_index| other_index == btree_index }.select do |other_index| + other_index.columns == columns + end + + # For now we ignore other indexes that are UNIQUE and have a matching columns subset of + # the btree_index columns, as UNIQUE indexes are still needed to enforce uniqueness + # constraints on subset of the columns. + matching_indexes = matching_indexes.reject do |other_index| + (other_index.unique && (other_index.columns.length < btree_index.columns.length)) + end + + all_matching_indexes += matching_indexes + end + + all_matching_indexes + end + end +end diff --git a/spec/support/helpers/database/duplicate_indexes.yml b/spec/support/helpers/database/duplicate_indexes.yml new file mode 100644 index 000000000000..c90cfa62ae0e --- /dev/null +++ b/spec/support/helpers/database/duplicate_indexes.yml @@ -0,0 +1,264 @@ +--- +# It maps table_name to {index1: array_of_duplicate_indexes, index2: array_of_duplicate_indexes, ... } +abuse_reports: + idx_abuse_reports_user_id_status_and_category: + - index_abuse_reports_on_user_id +alert_management_http_integrations: + index_http_integrations_on_project_and_endpoint: + - index_alert_management_http_integrations_on_project_id +analytics_cycle_analytics_group_stages: + index_group_stages_on_group_id_group_value_stream_id_and_name: + - index_analytics_ca_group_stages_on_group_id +approval_project_rules_users: + index_approval_project_rules_users_1: + - index_approval_project_rules_users_on_approval_project_rule_id +approvals: + index_approvals_on_merge_request_id_and_created_at: + - index_approvals_on_merge_request_id +board_group_recent_visits: + index_board_group_recent_visits_on_user_group_and_board: + - index_board_group_recent_visits_on_user_id +board_project_recent_visits: + index_board_project_recent_visits_on_user_project_and_board: + - index_board_project_recent_visits_on_user_id +board_user_preferences: + index_board_user_preferences_on_user_id_and_board_id: + - index_board_user_preferences_on_user_id +boards_epic_board_recent_visits: + index_epic_board_recent_visits_on_user_group_and_board: + - index_boards_epic_board_recent_visits_on_user_id +boards_epic_user_preferences: + index_boards_epic_user_preferences_on_board_user_epic_unique: + - index_boards_epic_user_preferences_on_board_id +bulk_import_batch_trackers: + i_bulk_import_trackers_id_batch_number: + - index_bulk_import_batch_trackers_on_tracker_id +bulk_import_export_batches: + i_bulk_import_export_batches_id_batch_number: + - index_bulk_import_export_batches_on_export_id +ci_job_artifacts: + index_ci_job_artifacts_on_id_project_id_and_created_at: + - index_ci_job_artifacts_on_project_id + index_ci_job_artifacts_on_id_project_id_and_file_type: + - index_ci_job_artifacts_on_project_id + index_ci_job_artifacts_on_project_id_and_id: + - index_ci_job_artifacts_on_project_id +ci_pipeline_artifacts: + index_ci_pipeline_artifacts_on_pipeline_id_and_file_type: + - index_ci_pipeline_artifacts_on_pipeline_id +ci_stages: + index_ci_stages_on_pipeline_id_and_name: + - index_ci_stages_on_pipeline_id + index_ci_stages_on_pipeline_id_and_position: + - index_ci_stages_on_pipeline_id +dast_site_tokens: + index_dast_site_token_on_project_id_and_url: + - index_dast_site_tokens_on_project_id +design_management_designs: + index_design_management_designs_on_iid_and_project_id: + - index_design_management_designs_on_project_id +design_management_designs_versions: + design_management_designs_versions_uniqueness: + - index_design_management_designs_versions_on_design_id +error_tracking_errors: + index_et_errors_on_project_id_and_status_and_id: + - index_error_tracking_errors_on_project_id + index_et_errors_on_project_id_and_status_events_count_id_desc: + - index_error_tracking_errors_on_project_id + index_et_errors_on_project_id_and_status_first_seen_at_id_desc: + - index_error_tracking_errors_on_project_id + index_et_errors_on_project_id_and_status_last_seen_at_id_desc: + - index_error_tracking_errors_on_project_id +geo_node_namespace_links: + index_geo_node_namespace_links_on_geo_node_id_and_namespace_id: + - index_geo_node_namespace_links_on_geo_node_id +in_product_marketing_emails: + index_in_product_marketing_emails_on_user_campaign: + - index_in_product_marketing_emails_on_user_id + index_in_product_marketing_emails_on_user_track_series: + - index_in_product_marketing_emails_on_user_id +incident_management_oncall_participants: + index_inc_mgmnt_oncall_participants_on_user_id_and_rotation_id: + - index_inc_mgmnt_oncall_participants_on_oncall_user_id +incident_management_oncall_schedules: + index_im_oncall_schedules_on_project_id_and_iid: + - index_incident_management_oncall_schedules_on_project_id +instance_audit_events_streaming_headers: + idx_instance_external_audit_event_destination_id_key_uniq: + - idx_headers_instance_external_audit_event_destination_id +issue_links: + index_issue_links_on_source_id_and_target_id: + - index_issue_links_on_source_id +issues: + index_issues_on_author_id_and_id_and_created_at: + - index_issues_on_author_id +jira_connect_subscriptions: + idx_jira_connect_subscriptions_on_installation_id_namespace_id: + - idx_jira_connect_subscriptions_on_installation_id +list_user_preferences: + index_list_user_preferences_on_user_id_and_list_id: + - index_list_user_preferences_on_user_id +member_tasks: + index_member_tasks_on_member_id_and_project_id: + - index_member_tasks_on_member_id +members: + index_members_on_member_namespace_id_compound: + - index_members_on_member_namespace_id +merge_request_assignees: + index_merge_request_assignees_on_merge_request_id_and_user_id: + - index_merge_request_assignees_on_merge_request_id +merge_request_metrics: + index_mr_metrics_on_target_project_id_merged_at_nulls_last: + - index_merge_request_metrics_on_target_project_id +merge_requests: + index_merge_requests_on_author_id_and_created_at: + - index_merge_requests_on_author_id + index_merge_requests_on_author_id_and_id: + - index_merge_requests_on_author_id + index_merge_requests_on_author_id_and_target_project_id: + - index_merge_requests_on_author_id +ml_candidate_params: + index_ml_candidate_params_on_candidate_id_on_name: + - index_ml_candidate_params_on_candidate_id +ml_candidates: + index_ml_candidates_on_project_id_on_internal_id: + - index_ml_candidates_on_project_id +ml_model_versions: + index_ml_model_versions_on_project_id_and_model_id_and_version: + - index_ml_model_versions_on_project_id +ml_models: + index_ml_models_on_project_id_and_name: + - index_ml_models_on_project_id +p_ci_runner_machine_builds: + index_p_ci_runner_machine_builds_on_runner_machine_id: + - index_ci_runner_machine_builds_on_runner_machine_id +packages_debian_group_distributions: + uniq_pkgs_debian_group_distributions_group_id_and_codename: + - index_packages_debian_group_distributions_on_group_id + uniq_pkgs_debian_group_distributions_group_id_and_suite: + - index_packages_debian_group_distributions_on_group_id +packages_debian_project_distributions: + uniq_pkgs_debian_project_distributions_project_id_and_codename: + - index_packages_debian_project_distributions_on_project_id + uniq_pkgs_debian_project_distributions_project_id_and_suite: + - index_packages_debian_project_distributions_on_project_id +packages_tags: + index_packages_tags_on_package_id_and_updated_at: + - index_packages_tags_on_package_id +pages_deployments: + index_pages_deployments_unique_path_prefix_by_project: + - index_pages_deployments_on_project_id +pages_domains: + index_pages_domains_on_project_id_and_enabled_until: + - index_pages_domains_on_project_id + index_pages_domains_on_verified_at_and_enabled_until: + - index_pages_domains_on_verified_at +personal_access_tokens: + index_pat_on_user_id_and_expires_at: + - index_personal_access_tokens_on_user_id +pm_affected_packages: + i_affected_packages_unique_for_upsert: + - index_pm_affected_packages_on_pm_advisory_id +pm_package_version_licenses: + i_pm_package_version_licenses_join_ids: + - index_pm_package_version_licenses_on_pm_package_version_id +pm_package_versions: + i_pm_package_versions_on_package_id_and_version: + - index_pm_package_versions_on_pm_package_id +project_compliance_standards_adherence: + u_project_compliance_standards_adherence_for_reporting: + - index_project_compliance_standards_adherence_on_project_id +project_relation_exports: + index_project_export_job_relation: + - index_project_relation_exports_on_project_export_job_id +project_repositories: + index_project_repositories_on_shard_id_and_project_id: + - index_project_repositories_on_shard_id +project_topics: + index_project_topics_on_project_id_and_topic_id: + - index_project_topics_on_project_id +projects: + index_projects_api_path_id_desc: + - index_on_projects_path + index_projects_on_path_and_id: + - index_on_projects_path +protected_environments: + index_protected_environments_on_project_id_and_name: + - index_protected_environments_on_project_id +protected_tags: + index_protected_tags_on_project_id_and_name: + - index_protected_tags_on_project_id +related_epic_links: + index_related_epic_links_on_source_id_and_target_id: + - index_related_epic_links_on_source_id +requirements_management_test_reports: + idx_test_reports_on_issue_id_created_at_and_id: + - index_requirements_management_test_reports_on_issue_id +sbom_component_versions: + index_sbom_component_versions_on_component_id_and_version: + - index_sbom_component_versions_on_component_id +sbom_occurrences: + index_sbom_occurrences_for_input_file_path_search: + - index_sbom_occurrences_on_project_id_component_id + - index_sbom_occurrences_on_project_id + idx_sbom_occurrences_on_project_id_and_source_id: + - index_sbom_occurrences_on_project_id + index_sbom_occurrences_on_project_id_and_id: + - index_sbom_occurrences_on_project_id + index_sbom_occurrences_on_project_id_component_id: + - index_sbom_occurrences_on_project_id + index_sbom_occurrences_on_project_id_and_component_id_and_id: + - index_sbom_occurrences_on_project_id_component_id + - index_sbom_occurrences_on_project_id + index_sbom_occurrences_on_project_id_and_package_manager: + - index_sbom_occurrences_on_project_id +scan_result_policies: + index_scan_result_policies_on_position_in_configuration: + - index_scan_result_policies_on_policy_configuration_id +search_namespace_index_assignments: + index_search_namespace_index_assignments_uniqueness_index_type: + - index_search_namespace_index_assignments_on_namespace_id + index_search_namespace_index_assignments_uniqueness_on_index_id: + - index_search_namespace_index_assignments_on_namespace_id +sprints: + sequence_is_unique_per_iterations_cadence_id: + - index_sprints_iterations_cadence_id +taggings: + taggings_idx: + - index_taggings_on_tag_id +term_agreements: + term_agreements_unique_index: + - index_term_agreements_on_user_id +todos: + index_todos_on_author_id_and_created_at: + - index_todos_on_author_id +user_callouts: + index_user_callouts_on_user_id_and_feature_name: + - index_user_callouts_on_user_id +users: + index_users_on_state_and_user_type: + - index_users_on_state +vulnerabilities: + index_vulnerabilities_project_id_state_severity_default_branch: + - index_vulnerabilities_on_project_id_and_state_and_severity +vulnerability_external_issue_links: + idx_vulnerability_ext_issue_links_on_vulne_id_and_ext_issue: + - index_vulnerability_external_issue_links_on_vulnerability_id +vulnerability_finding_links: + finding_link_name_url_idx: + - finding_links_on_vulnerability_occurrence_id +vulnerability_finding_signatures: + idx_vuln_signatures_uniqueness_signature_sha: + - index_vulnerability_finding_signatures_on_finding_id +vulnerability_flags: + index_vulnerability_flags_on_unique_columns: + - index_vulnerability_flags_on_vulnerability_occurrence_id +web_hook_logs: + index_web_hook_logs_on_web_hook_id_and_created_at: + - index_web_hook_logs_part_on_web_hook_id +web_hooks: + index_web_hooks_on_project_id_recent_failures: + - index_web_hooks_on_project_id +work_item_hierarchy_restrictions: + index_work_item_hierarchy_restrictions_on_parent_and_child: + - index_work_item_hierarchy_restrictions_on_parent_type_id diff --git a/spec/support_specs/database/duplicate_indexes_spec.rb b/spec/support_specs/database/duplicate_indexes_spec.rb new file mode 100644 index 000000000000..bbcb80f0593d --- /dev/null +++ b/spec/support_specs/database/duplicate_indexes_spec.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Database::DuplicateIndexes, feature_category: :database do + index_class = ActiveRecord::ConnectionAdapters::IndexDefinition + let(:default_index_options) { { using: :btree, orders: {}, unique: false, opclasses: {}, where: nil } } + + let(:table_name) { 'foobar' } + let(:index1) { instance_double(index_class, default_index_options.merge(name: 'index1', columns: %w[user_id])) } + let(:index1_copy) { instance_double(index_class, default_index_options.merge(name: 'index1b', columns: %w[user_id])) } + let(:index2) { instance_double(index_class, default_index_options.merge(name: 'index2', columns: %w[project_id])) } + let(:index3) do + instance_double(index_class, default_index_options.merge(name: 'index3', columns: %w[user_id project_id])) + end + + let(:index3_inverse) do + instance_double(index_class, default_index_options.merge(name: 'index3_inverse', columns: %w[project_id user_id])) + end + + let(:index1_unique) do + instance_double(index_class, default_index_options.merge(name: 'index1_unique', columns: %w[user_id], unique: true)) + end + + let(:index1_desc) do + instance_double( + index_class, + default_index_options.merge(name: 'index1', columns: %w[user_id], orders: { user_id: 'desc' }) + ) + end + + let(:index3_with_where) do + instance_double( + index_class, + default_index_options.merge(name: 'index3_with_where', columns: %w[user_id project_id], where: "id > 100") + ) + end + + subject(:duplicate_indexes) do + described_class.new(table_name, indexes).duplicate_indexes + end + + context 'when there are no duplicate indexes' do + let(:indexes) { [index1, index2] } + + it { expect(duplicate_indexes).to be_empty } + end + + context 'when overlapping indexes' do + let(:indexes) { [index1, index3] } + + it 'detects a duplicate index between index1 and index3' do + expected_duplicate_indexes = { index_struct(index3) => [index_struct(index1)] } + + expect(duplicate_indexes).to eq(expected_duplicate_indexes) + end + end + + context 'when the indexes have the inverse order of columns' do + let(:indexes) { [index3, index3_inverse] } + + it 'does not detect duplicate indexes between index3 and index3_inverse' do + expect(duplicate_indexes).to eq({}) + end + end + + # For now we ignore other indexes that are UNIQUE and have a matching columns subset of + # the btree_index columns, as UNIQUE indexes are still needed to enforce uniqueness + # constraints on subset of the columns. + context 'when the index with matching sub-columns is unique' do + let(:indexes) { [index3, index1_unique] } + + it 'does not detect duplicate indexes between index3 and index1_unique' do + expect(duplicate_indexes).to eq({}) + end + end + + context 'when the one of the indexes is a conditional index' do + let(:indexes) { [index3, index3_with_where] } + + it 'does not detect duplicate indexes between index3 and index3_with_where' do + expect(duplicate_indexes).to eq({}) + end + end + + context 'when identical indexes' do + let(:indexes) { [index1, index1_copy] } + + it 'detects a duplicate index between index1 and index3' do + expected_duplicate_indexes = { + index_struct(index1) => [index_struct(index1_copy)], + index_struct(index1_copy) => [index_struct(index1)] + } + + expect(duplicate_indexes).to eq(expected_duplicate_indexes) + end + end + + context 'when indexes have the same columns but with different order' do + let(:indexes) { [index1, index1_desc] } + + it { expect(duplicate_indexes).to be_empty } + end + + def index_struct(index) + Database::DuplicateIndexes.btree_index_struct(index) + end +end -- GitLab