From 0c2bcbd3db1b2c7cc47330e74aac2dac6dadca4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= <ayufan@ayufan.eu>
Date: Fri, 5 Nov 2021 12:28:28 +0100
Subject: [PATCH] Add `QueryAnalyzer` to have a single approach to hook all
 analyzers

This adds a generic and central place to parse and normalize queries
via `pg_query`. Makes it easy to write new analyzers and hook them depending
on other conditions.
---
 .../initializers/database_query_analyzers.rb  |  4 ++
 .../database/load_balancing/configuration.rb  |  4 ++
 lib/gitlab/database/query_analyzer.rb         | 60 ++++++++++++++++
 lib/gitlab/database/query_analyzers/base.rb   | 17 +++++
 .../gitlab/database/query_analyzer_spec.rb    | 72 +++++++++++++++++++
 5 files changed, 157 insertions(+)
 create mode 100644 config/initializers/database_query_analyzers.rb
 create mode 100644 lib/gitlab/database/query_analyzer.rb
 create mode 100644 lib/gitlab/database/query_analyzers/base.rb
 create mode 100644 spec/lib/gitlab/database/query_analyzer_spec.rb

diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb
new file mode 100644
index 0000000000000..a29cd6552ffd1
--- /dev/null
+++ b/config/initializers/database_query_analyzers.rb
@@ -0,0 +1,4 @@
+# frozen_string_literal: true
+
+# Currently we register validator only for `dev` or `test` environment
+Gitlab::Database::QueryAnalyzer.new.hook! if Gitlab.dev_or_test_env?
diff --git a/lib/gitlab/database/load_balancing/configuration.rb b/lib/gitlab/database/load_balancing/configuration.rb
index 7814083a5271f..da313361073cb 100644
--- a/lib/gitlab/database/load_balancing/configuration.rb
+++ b/lib/gitlab/database/load_balancing/configuration.rb
@@ -77,6 +77,10 @@ def primary_connection_specification_name
           (@primary_model || @model).connection_specification_name
         end
 
+        def primary_db_config
+          (@primary_model || @model).connection_db_config
+        end
+
         def replica_db_config
           @model.connection_db_config
         end
diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb
new file mode 100644
index 0000000000000..830ad1383c047
--- /dev/null
+++ b/lib/gitlab/database/query_analyzer.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    # The purpose of this class is to implement a various query analyzers based on `pg_query`
+    # And process them all via `Gitlab::Database::QueryAnalyzers::*`
+    class QueryAnalyzer
+      ANALYZERS = [].freeze
+
+      Parsed = Struct.new(
+        :sql, :connection, :pg
+      )
+
+      def hook!
+        @subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |event|
+          process_sql(event.payload[:sql], event.payload[:connection])
+        end
+      end
+
+      private
+
+      def process_sql(sql, connection)
+        analyzers = enabled_analyzers(connection)
+        return unless analyzers.any?
+
+        parsed = parse(sql, connection)
+        return unless parsed
+
+        analyzers.each do |analyzer|
+          analyzer.analyze(parsed)
+        rescue => e # rubocop:disable Style/RescueStandardError
+          # We catch all standard errors to prevent validation errors to introduce fatal errors in production
+          Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
+        end
+      end
+
+      def enabled_analyzers(connection)
+        ANALYZERS.select do |analyzer|
+          analyzer.enabled?(connection)
+        rescue StandardError => e # rubocop:disable Style/RescueStandardError
+          # We catch all standard errors to prevent validation errors to introduce fatal errors in production
+          Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
+        end
+      end
+
+      def parse(sql, connection)
+        parsed = PgQuery.parse(sql)
+        return unless parsed
+
+        normalized = PgQuery.normalize(sql)
+        Parsed.new(normalized, connection, parsed)
+      rescue PgQuery::ParseError => e
+        # Ignore PgQuery parse errors (due to depth limit or other reasons)
+        Gitlab::ErrorTracking.track_exception(e)
+
+        nil
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/database/query_analyzers/base.rb b/lib/gitlab/database/query_analyzers/base.rb
new file mode 100644
index 0000000000000..e1be889c803ea
--- /dev/null
+++ b/lib/gitlab/database/query_analyzers/base.rb
@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    module QueryAnalyzers
+      class Base
+        def self.enabled?(connection)
+          raise NotImplementedError
+        end
+
+        def self.analyze(parsed)
+          raise NotImplementedError
+        end
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/database/query_analyzer_spec.rb b/spec/lib/gitlab/database/query_analyzer_spec.rb
new file mode 100644
index 0000000000000..b8356522e55ca
--- /dev/null
+++ b/spec/lib/gitlab/database/query_analyzer_spec.rb
@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::QueryAnalyzer do
+  let(:analyzer) { double(:query_analyzer) }
+
+  before do
+    stub_const('Gitlab::Database::QueryAnalyzer::ANALYZERS', [analyzer])
+  end
+
+  context 'the hook is enabled by default in specs' do
+    it 'does process queries and gets normalized SQL' do
+      expect(analyzer).to receive(:enabled?).and_return(true)
+      expect(analyzer).to receive(:analyze) do |parsed|
+        expect(parsed.sql).to include("SELECT $1 FROM projects")
+        expect(parsed.pg.tables).to eq(%w[projects])
+      end
+
+      Project.connection.execute("SELECT 1 FROM projects")
+    end
+  end
+
+  describe '#process_sql' do
+    it 'does not analyze query if not enabled' do
+      expect(analyzer).to receive(:enabled?).and_return(false)
+      expect(analyzer).not_to receive(:analyze)
+
+      process_sql("SELECT 1 FROM projects")
+    end
+
+    it 'does analyze query if enabled' do
+      expect(analyzer).to receive(:enabled?).and_return(true)
+      expect(analyzer).to receive(:analyze) do |parsed|
+        expect(parsed.sql).to eq("SELECT $1 FROM projects")
+        expect(parsed.pg.tables).to eq(%w[projects])
+      end
+
+      process_sql("SELECT 1 FROM projects")
+    end
+
+    it 'does track exception if query cannot be parsed' do
+      expect(analyzer).to receive(:enabled?).and_return(true)
+      expect(analyzer).not_to receive(:analyze)
+      expect(Gitlab::ErrorTracking).to receive(:track_exception)
+
+      expect { process_sql("invalid query") }.not_to raise_error
+    end
+
+    it 'does track exception if analyzer raises exception on enabled?' do
+      expect(analyzer).to receive(:enabled?).and_raise('exception')
+      expect(analyzer).not_to receive(:analyze)
+      expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
+
+      expect { process_sql("SELECT 1 FROM projects") }.not_to raise_error
+    end
+
+    it 'does track exception if analyzer raises exception on analyze' do
+      expect(analyzer).to receive(:enabled?).and_return(true)
+      expect(analyzer).to receive(:analyze).and_raise('exception')
+      expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
+
+      expect { process_sql("SELECT 1 FROM projects") }.not_to raise_error
+    end
+
+    def process_sql(sql)
+      ApplicationRecord.connection.load_balancer.read_write do |connection|
+        described_class.new.send(:process_sql, sql, connection)
+      end
+    end
+  end
+end
-- 
GitLab