From 053df6976b4f119dc9eca9ca2260b5481ccaf6c3 Mon Sep 17 00:00:00 2001 From: Jon Jenkins <jjenkins@gitlab.com> Date: Wed, 26 Feb 2025 17:35:55 +0000 Subject: [PATCH] Add JSONB scan detector Adds the necessary script to detect a WHERE or JOIN on a JSONB column. Please see linked issue for more info. --- scripts/database/query_analyzers.rb | 2 +- scripts/database/query_analyzers.yml | 2 + scripts/database/query_analyzers/base.rb | 2 +- .../query_analyzers/jsonb_scan_detector.rb | 46 ++++++++++++++++ .../multiple_partition_scan_detector.rb | 2 +- .../jsonb_scan_detector_spec.rb | 54 +++++++++++++++++++ 6 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 scripts/database/query_analyzers/jsonb_scan_detector.rb create mode 100644 spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb diff --git a/scripts/database/query_analyzers.rb b/scripts/database/query_analyzers.rb index 89eae5e6f2cec..012cba03120bf 100644 --- a/scripts/database/query_analyzers.rb +++ b/scripts/database/query_analyzers.rb @@ -2,7 +2,7 @@ require 'yaml' -class Database +module Database class QueryAnalyzers attr_reader :analyzers diff --git a/scripts/database/query_analyzers.yml b/scripts/database/query_analyzers.yml index 8f87d96500dff..e61e6f57f4758 100644 --- a/scripts/database/query_analyzers.yml +++ b/scripts/database/query_analyzers.yml @@ -9,3 +9,5 @@ MultiplePartitionScanDetector: # These fingerprints can be found in the auto_explain pipeline artifacts. # Example: # - c2cfe803a497101b +JSONBScanDetector: + todos: diff --git a/scripts/database/query_analyzers/base.rb b/scripts/database/query_analyzers/base.rb index e56a61b3c396c..4cbb089e856ac 100644 --- a/scripts/database/query_analyzers/base.rb +++ b/scripts/database/query_analyzers/base.rb @@ -3,7 +3,7 @@ require 'json' require 'zlib' -class Database +module Database class QueryAnalyzers class Base attr_accessor :output diff --git a/scripts/database/query_analyzers/jsonb_scan_detector.rb b/scripts/database/query_analyzers/jsonb_scan_detector.rb new file mode 100644 index 0000000000000..b94168334eb48 --- /dev/null +++ b/scripts/database/query_analyzers/jsonb_scan_detector.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require_relative 'base' + +module Database + class QueryAnalyzers + class JSONBScanDetector < Database::QueryAnalyzers::Base + JSONB_MATCH_OPERATOR_EXPRESSION = /<@|@>/ + + def initialize(*args) + super + output[:bad_queries] = [] + end + + def analyze(query) + super + return if config['todos']&.include?(query['fingerprint']) + + output[:bad_queries] << query if has_operator_in_where?(query['query']) + end + + def save! + return if output[:bad_queries].empty? + + Zlib::GzipWriter.open(output_path("jsonb_column_scans.ndjson")) do |file| + output[:bad_queries].each do |query| + file.puts(JSON.generate(query)) + end + end + end + + private + + def has_operator_in_where?(query) + return false unless query.match?(JSONB_MATCH_OPERATOR_EXPRESSION) + + clauses = query.split(/\sWHERE\s|\sJOIN\s/) + return false if clauses.length < 2 + + clauses[1..].each do |c| + return true if c.include?('::jsonb') + end + end + end + end +end diff --git a/scripts/database/query_analyzers/multiple_partition_scan_detector.rb b/scripts/database/query_analyzers/multiple_partition_scan_detector.rb index 1a1415dd8f228..b2e364be4bd17 100644 --- a/scripts/database/query_analyzers/multiple_partition_scan_detector.rb +++ b/scripts/database/query_analyzers/multiple_partition_scan_detector.rb @@ -2,7 +2,7 @@ require_relative 'base' -class Database +module Database class QueryAnalyzers class MultiplePartitionScanDetector < Database::QueryAnalyzers::Base def analyze(query) diff --git a/spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb b/spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb new file mode 100644 index 0000000000000..78d4405e19876 --- /dev/null +++ b/spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +require_relative '../../../../scripts/database/query_analyzers/jsonb_scan_detector' + +RSpec.describe Database::QueryAnalyzers::JSONBScanDetector, feature_category: :database do + let(:invalid_query_string) do + <<~SQL + SELECT + * + FROM + member_roles + WHERE + member_roles.permissions @> ('{"admin_merge_request":true}')::jsonb + SQL + end + + let(:invalid_query) { { 'query' => invalid_query_string, 'fingerprint' => '0000000000000001' } } + + let(:valid_query_string) { "SELECT * FROM users WHERE name = 'bob'" } + + let(:valid_query) { { 'query' => valid_query_string, 'fingerprint' => '0000000000000002' } } + + let(:config) { {} } + + subject(:analyzer) { described_class.new(config) } + + it "initalizes" do + expect { analyzer }.not_to raise_error + end + + context 'when no TODOs are defined' do + it 'finds the invalid query' do + [valid_query, invalid_query].each { |q| analyzer.analyze q } + expect(analyzer.output[:bad_queries].length).to eq 1 + end + end + + context 'when a TODO is defined' do + let(:config) do + { + "todos" => [ + invalid_query['fingerprint'] + ] + } + end + + it 'does not find the invalid query' do + [valid_query, invalid_query].each { |q| analyzer.analyze q } + expect(analyzer.output[:bad_queries].length).to eq 0 + end + end +end -- GitLab