From 053df6976b4f119dc9eca9ca2260b5481ccaf6c3 Mon Sep 17 00:00:00 2001
From: Jon Jenkins <jjenkins@gitlab.com>
Date: Wed, 26 Feb 2025 17:35:55 +0000
Subject: [PATCH] Add JSONB scan detector

Adds the necessary script to detect a WHERE or JOIN on a JSONB column.
Please see linked issue for more info.
---
 scripts/database/query_analyzers.rb           |  2 +-
 scripts/database/query_analyzers.yml          |  2 +
 scripts/database/query_analyzers/base.rb      |  2 +-
 .../query_analyzers/jsonb_scan_detector.rb    | 46 ++++++++++++++++
 .../multiple_partition_scan_detector.rb       |  2 +-
 .../jsonb_scan_detector_spec.rb               | 54 +++++++++++++++++++
 6 files changed, 105 insertions(+), 3 deletions(-)
 create mode 100644 scripts/database/query_analyzers/jsonb_scan_detector.rb
 create mode 100644 spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb

diff --git a/scripts/database/query_analyzers.rb b/scripts/database/query_analyzers.rb
index 89eae5e6f2cec..012cba03120bf 100644
--- a/scripts/database/query_analyzers.rb
+++ b/scripts/database/query_analyzers.rb
@@ -2,7 +2,7 @@
 
 require 'yaml'
 
-class Database
+module Database
   class QueryAnalyzers
     attr_reader :analyzers
 
diff --git a/scripts/database/query_analyzers.yml b/scripts/database/query_analyzers.yml
index 8f87d96500dff..e61e6f57f4758 100644
--- a/scripts/database/query_analyzers.yml
+++ b/scripts/database/query_analyzers.yml
@@ -9,3 +9,5 @@ MultiplePartitionScanDetector:
     # These fingerprints can be found in the auto_explain pipeline artifacts.
     # Example:
     # - c2cfe803a497101b
+JSONBScanDetector:
+  todos:
diff --git a/scripts/database/query_analyzers/base.rb b/scripts/database/query_analyzers/base.rb
index e56a61b3c396c..4cbb089e856ac 100644
--- a/scripts/database/query_analyzers/base.rb
+++ b/scripts/database/query_analyzers/base.rb
@@ -3,7 +3,7 @@
 require 'json'
 require 'zlib'
 
-class Database
+module Database
   class QueryAnalyzers
     class Base
       attr_accessor :output
diff --git a/scripts/database/query_analyzers/jsonb_scan_detector.rb b/scripts/database/query_analyzers/jsonb_scan_detector.rb
new file mode 100644
index 0000000000000..b94168334eb48
--- /dev/null
+++ b/scripts/database/query_analyzers/jsonb_scan_detector.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+require_relative 'base'
+
+module Database
+  class QueryAnalyzers
+    class JSONBScanDetector < Database::QueryAnalyzers::Base
+      JSONB_MATCH_OPERATOR_EXPRESSION = /<@|@>/
+
+      def initialize(*args)
+        super
+        output[:bad_queries] = []
+      end
+
+      def analyze(query)
+        super
+        return if config['todos']&.include?(query['fingerprint'])
+
+        output[:bad_queries] << query if has_operator_in_where?(query['query'])
+      end
+
+      def save!
+        return if output[:bad_queries].empty?
+
+        Zlib::GzipWriter.open(output_path("jsonb_column_scans.ndjson")) do |file|
+          output[:bad_queries].each do |query|
+            file.puts(JSON.generate(query))
+          end
+        end
+      end
+
+      private
+
+      def has_operator_in_where?(query)
+        return false unless query.match?(JSONB_MATCH_OPERATOR_EXPRESSION)
+
+        clauses = query.split(/\sWHERE\s|\sJOIN\s/)
+        return false if clauses.length < 2
+
+        clauses[1..].each do |c|
+          return true if c.include?('::jsonb')
+        end
+      end
+    end
+  end
+end
diff --git a/scripts/database/query_analyzers/multiple_partition_scan_detector.rb b/scripts/database/query_analyzers/multiple_partition_scan_detector.rb
index 1a1415dd8f228..b2e364be4bd17 100644
--- a/scripts/database/query_analyzers/multiple_partition_scan_detector.rb
+++ b/scripts/database/query_analyzers/multiple_partition_scan_detector.rb
@@ -2,7 +2,7 @@
 
 require_relative 'base'
 
-class Database
+module Database
   class QueryAnalyzers
     class MultiplePartitionScanDetector < Database::QueryAnalyzers::Base
       def analyze(query)
diff --git a/spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb b/spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb
new file mode 100644
index 0000000000000..78d4405e19876
--- /dev/null
+++ b/spec/scripts/database/query_analyzers/jsonb_scan_detector_spec.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+
+require_relative '../../../../scripts/database/query_analyzers/jsonb_scan_detector'
+
+RSpec.describe Database::QueryAnalyzers::JSONBScanDetector, feature_category: :database do
+  let(:invalid_query_string) do
+    <<~SQL
+      SELECT
+          *
+      FROM
+          member_roles
+      WHERE
+          member_roles.permissions @> ('{"admin_merge_request":true}')::jsonb
+    SQL
+  end
+
+  let(:invalid_query) { { 'query' => invalid_query_string, 'fingerprint' => '0000000000000001' } }
+
+  let(:valid_query_string) { "SELECT * FROM users WHERE name = 'bob'" }
+
+  let(:valid_query) { { 'query' => valid_query_string, 'fingerprint' => '0000000000000002' } }
+
+  let(:config) { {} }
+
+  subject(:analyzer) { described_class.new(config) }
+
+  it "initalizes" do
+    expect { analyzer }.not_to raise_error
+  end
+
+  context 'when no TODOs are defined' do
+    it 'finds the invalid query' do
+      [valid_query, invalid_query].each { |q| analyzer.analyze q }
+      expect(analyzer.output[:bad_queries].length).to eq 1
+    end
+  end
+
+  context 'when a TODO is defined' do
+    let(:config) do
+      {
+        "todos" => [
+          invalid_query['fingerprint']
+        ]
+      }
+    end
+
+    it 'does not find the invalid query' do
+      [valid_query, invalid_query].each { |q| analyzer.analyze q }
+      expect(analyzer.output[:bad_queries].length).to eq 0
+    end
+  end
+end
-- 
GitLab