From 10c91bc92e46e3017c7084b3d50b147d8dd28e20 Mon Sep 17 00:00:00 2001
From: Dmitry Gruzd <dgruzd@gitlab.com>
Date: Fri, 24 Jan 2025 11:39:04 +0000
Subject: [PATCH] Add production-ready Elasticsearch query processor

Implement a comprehensive Elasticsearch query processor that transforms
ActiveContext::Query AST nodes into Elasticsearch DSL queries. This
implementation provides proper handling of all query types and follows
best practices for query composition.

Key changes:
- Add Processor class with support for filter, prefix, knn, and boolean queries
- Implement helper methods for consistent query building
- Add comprehensive test coverage
- Add YARD documentation for all methods

Details:
* Processor changes:
  - Implement transform method following concern contract
  - Add build_bool_query helper for consistent query construction
  - Add proper handling of KNN queries with other conditions
  - Implement proper query composition for nested structures

* Test improvements:
  - Add comprehensive test suite with all query types
  - Add test cases for complex nested queries
  - Add edge case coverage
  - Use shared examples for common query patterns

* Documentation:
  - Add YARD documentation for all public methods
  - Add usage examples
  - Document query composition patterns
  - Add type specifications for parameters

This implementation provides a production-ready solution for transforming
query AST nodes into Elasticsearch queries, with proper handling of all
supported query types and edge cases.
---
 .../databases/concerns/processor.rb           |  39 ++
 .../databases/elasticsearch/client.rb         |   7 +-
 .../databases/elasticsearch/processor.rb      | 298 ++++++++++++++
 .../active_context/query/processor_example.rb |   4 +-
 .../databases/elasticsearch/client_spec.rb    |   5 +-
 .../databases/elasticsearch/processor_spec.rb | 383 ++++++++++++++++++
 .../gitlab-active-context/spec/spec_helper.rb |   2 +
 .../query_processor_examples.rb               |  32 ++
 8 files changed, 765 insertions(+), 5 deletions(-)
 create mode 100644 gems/gitlab-active-context/lib/active_context/databases/concerns/processor.rb
 create mode 100644 gems/gitlab-active-context/lib/active_context/databases/elasticsearch/processor.rb
 create mode 100644 gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/processor_spec.rb
 create mode 100644 gems/gitlab-active-context/spec/support/shared_examples/query_processor_examples.rb

diff --git a/gems/gitlab-active-context/lib/active_context/databases/concerns/processor.rb b/gems/gitlab-active-context/lib/active_context/databases/concerns/processor.rb
new file mode 100644
index 0000000000000..d314106dffc9a
--- /dev/null
+++ b/gems/gitlab-active-context/lib/active_context/databases/concerns/processor.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module ActiveContext
+  module Databases
+    module Concerns
+      # Concern for processors that transform Query AST nodes into database-specific queries.
+      #
+      # @example Implementation
+      #   class MyProcessor
+      #     include ActiveContext::Databases::Concerns::Processor
+      #
+      #     def self.transform(node)
+      #       new.process(node)
+      #     end
+      #
+      #     def process(node)
+      #       # Transform the node into a database-specific query
+      #     end
+      #   end
+      module Processor
+        extend ActiveSupport::Concern
+
+        included do
+          # @abstract Implement #process in subclass to transform query nodes
+          def process(_node)
+            raise NotImplementedError, "#{self.class.name} must implement #process"
+          end
+        end
+
+        class_methods do
+          # @abstract Implement .transform in subclass to handle query transformation
+          def transform(_node)
+            raise NotImplementedError, "#{name} must implement .transform"
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/gems/gitlab-active-context/lib/active_context/databases/elasticsearch/client.rb b/gems/gitlab-active-context/lib/active_context/databases/elasticsearch/client.rb
index 39b4b8f3fbc92..42c0a8b094e4b 100644
--- a/gems/gitlab-active-context/lib/active_context/databases/elasticsearch/client.rb
+++ b/gems/gitlab-active-context/lib/active_context/databases/elasticsearch/client.rb
@@ -13,8 +13,11 @@ def initialize(options)
           @options = options
         end
 
-        def search(_query)
-          res = client.search
+        def search(collection:, query:)
+          raise ArgumentError, "Expected Query object, you used #{query.class}" unless query.is_a?(ActiveContext::Query)
+
+          es_query = Processor.transform(query)
+          res = client.search(index: collection, body: es_query)
           QueryResult.new(res)
         end
 
diff --git a/gems/gitlab-active-context/lib/active_context/databases/elasticsearch/processor.rb b/gems/gitlab-active-context/lib/active_context/databases/elasticsearch/processor.rb
new file mode 100644
index 0000000000000..dd2d24b2b0b6a
--- /dev/null
+++ b/gems/gitlab-active-context/lib/active_context/databases/elasticsearch/processor.rb
@@ -0,0 +1,298 @@
+# frozen_string_literal: true
+
+module ActiveContext
+  module Databases
+    module Elasticsearch
+      # Transforms ActiveContext::Query objects into Elasticsearch query DSL format.
+      #
+      # This processor handles the conversion of various query types into their corresponding
+      # Elasticsearch query structures, including:
+      # - Term queries for exact matches (single values)
+      # - Terms queries for multiple value matches (array values)
+      # - Prefix queries for starts-with matches
+      # - Bool queries for AND/OR combinations
+      # - KNN queries for vector similarity search
+      #
+      # KNN queries are handled specially to ensure they work with Elasticsearch's requirements:
+      # - Basic KNN queries are placed at the root level under the 'knn' key
+      # - When combining KNN with filters, the filters are included inside the KNN query under 'filter'
+      # - OR conditions with KNN maintain the KNN at root level with other conditions under 'query'
+      #
+      # @example Basic filter query with term
+      #   query = ActiveContext::Query.filter(status: 'active')
+      #   processor = Processor.new
+      #   processor.process(query)
+      #   # => { query: { bool: { must: [{ term: { status: 'active' } }] } } }
+      #
+      # @example Filter query with terms
+      #   query = ActiveContext::Query.filter(status: ['active', 'pending'])
+      #   processor = Processor.new
+      #   processor.process(query)
+      #   # => { query: { bool: { must: [{ terms: { status: ['active', 'pending'] } }] } } }
+      #
+      # @example KNN with filter
+      #   query = ActiveContext::Query.filter(status: 'active').knn(
+      #     target: 'embedding',
+      #     vector: [0.1, 0.2],
+      #     limit: 5
+      #   )
+      #   processor = Processor.new
+      #   processor.process(query)
+      #   # => {
+      #   #      knn: {
+      #   #        field: 'embedding',
+      #   #        query_vector: [0.1, 0.2],
+      #   #        k: 5,
+      #   #        num_candidates: 50,
+      #   #        filter: { bool: { must: [{ term: { status: 'active' } }] } }
+      #   #      }
+      #   #    }
+      class Processor
+        include ActiveContext::Databases::Concerns::Processor
+
+        # Transforms a query node into Elasticsearch query DSL
+        #
+        # @param node [ActiveContext::Query] The query node to transform
+        # @return [Hash] The Elasticsearch query DSL
+        # @example
+        #   Processor.transform(ActiveContext::Query.filter(status: 'active'))
+        def self.transform(node)
+          new.process(node)
+        end
+
+        # Processes a query node and returns the corresponding Elasticsearch query
+        #
+        # @param node [ActiveContext::Query] The query node to process
+        # @return [Hash] The Elasticsearch query DSL
+        # @raise [ArgumentError] If the query type is not supported
+        def process(node)
+          case node.type
+          when :filter  then process_filter(node.value)
+          when :prefix  then process_prefix(node.value)
+          when :or      then process_or(node)
+          when :and     then process_and(node.children)
+          when :knn     then process_knn(node)
+          when :limit   then process_limit(node)
+          else
+            raise ArgumentError, "Unsupported node type: #{node.type}"
+          end
+        end
+
+        private
+
+        # Processes filter conditions into term or terms queries
+        #
+        # @param conditions [Hash] The filter conditions where keys are fields and values are the terms
+        # @return [Hash] A bool query with term/terms clauses in the must array
+        # @example Single value (term)
+        #   process_filter(status: 'active')
+        #   # => { query: { bool: { must: [{ term: { status: 'active' } }] } } }
+        # @example Array value (terms)
+        #   process_filter(status: ['active', 'pending'])
+        #   # => { query: { bool: { must: [{ terms: { status: ['active', 'pending'] } }] } } }
+        def process_filter(conditions)
+          build_bool_query(:must) do |queries|
+            conditions.each do |field, value|
+              queries << (value.is_a?(Array) ? { terms: { field => value } } : { term: { field => value } })
+            end
+          end
+        end
+
+        # Processes prefix conditions into prefix queries
+        #
+        # @param conditions [Hash] The prefix conditions where keys are fields and values are the prefixes
+        # @return [Hash] A bool query with prefix clauses in the must array
+        # @example
+        #   process_prefix(name: 'test', path: 'foo/')
+        #   # => { query: { bool: { must: [
+        #   #      { prefix: { name: 'test' } },
+        #   #      { prefix: { path: 'foo/' } }
+        #   #    ] } } }
+        def process_prefix(conditions)
+          build_bool_query(:must) do |queries|
+            conditions.each do |field, value|
+              queries << { prefix: { field => value } }
+            end
+          end
+        end
+
+        # Processes OR queries, with special handling for KNN
+        #
+        # @param node [ActiveContext::Query] The OR query node
+        # @return [Hash] Either:
+        #   - A bool query with should clauses for simple OR conditions
+        #   - A combined structure with KNN at root level and other conditions under 'query' for OR with KNN
+        # @see #process_simple_or
+        # @see #process_or_with_knn
+        def process_or(node)
+          if contains_knn?(node)
+            process_or_with_knn(node)
+          else
+            process_simple_or(node.children)
+          end
+        end
+
+        # Processes simple OR conditions (without KNN)
+        #
+        # @param children [Array<ActiveContext::Query>] The child queries to OR together
+        # @return [Hash] A bool query with should clauses and minimum_should_match: 1
+        # @example
+        #   process_simple_or([filter_query, prefix_query])
+        #   # => { query: { bool: {
+        #   #      should: [...],
+        #   #      minimum_should_match: 1
+        #   #    } } }
+        def process_simple_or(children)
+          build_bool_query(:should, minimum_should_match: 1) do |queries|
+            children.each do |child|
+              queries << extract_query(process(child))
+            end
+          end
+        end
+
+        # Processes OR conditions that include a KNN query
+        #
+        # @param node [ActiveContext::Query] The OR query node containing KNN
+        # @return [Hash] A combined structure with KNN at root level and other conditions under 'query'
+        # @example
+        #   # For KNN OR filter:
+        #   # => {
+        #   #      knn: { field: 'embedding', ... },
+        #   #      query: { bool: { should: [...], minimum_should_match: 1 } }
+        #   #    }
+        def process_or_with_knn(node)
+          knn_child = find_knn_child(node)
+          other_conditions = build_or_conditions(node, knn_child)
+          knn_params = extract_knn_params(knn_child)
+
+          other_conditions.empty? ? { knn: knn_params } : { knn: knn_params, query: extract_query(other_conditions) }
+        end
+
+        # Processes AND conditions
+        #
+        # @param children [Array<ActiveContext::Query>] The child queries to AND together
+        # @return [Hash] A bool query with must clauses
+        # @example
+        #   process_and([filter_query, prefix_query])
+        #   # => { query: { bool: { must: [...] } } }
+        def process_and(children)
+          build_bool_query(:must) do |queries|
+            children.each do |child|
+              queries << extract_query(process(child))
+            end
+          end
+        end
+
+        # Processes KNN query, combining with optional filter conditions
+        #
+        # @param node [ActiveContext::Query] The KNN query node
+        # @return [Hash] KNN parameters at root level, with filter conditions nested inside KNN if present
+        # @example
+        #   # Basic KNN:
+        #   # => { knn: { field: 'embedding', ... } }
+        #   # KNN with filter:
+        #   # => {
+        #   #      knn: {
+        #   #        field: 'embedding',
+        #   #        ...,
+        #   #        filter: { bool: { must: [...] } }
+        #   #      }
+        #   #    }
+        def process_knn(node)
+          knn_params = extract_knn_params(node)
+          base_query = node.children.any? ? process(node.children.first) : nil
+          knn_params[:filter] = extract_query(base_query) if base_query
+
+          { knn: knn_params }
+        end
+
+        # Processes limit by adding size parameter
+        #
+        # @param node [ActiveContext::Query] The limit query node
+        # @return [Hash] The query with size parameter added
+        # @example
+        #   # With size 10:
+        #   # => { query: {...}, size: 10 }
+        def process_limit(node)
+          child_query = process(node.children.first)
+          child_query.merge(size: node.value)
+        end
+
+        # Checks if node contains a KNN query
+        #
+        # @param node [ActiveContext::Query] The query node to check
+        # @return [Boolean] true if any child is a KNN query
+        def contains_knn?(node)
+          node.children.any? { |child| child.type == :knn }
+        end
+
+        # Finds the KNN child in a query node
+        #
+        # @param node [ActiveContext::Query] The query node to search
+        # @return [ActiveContext::Query, nil] The KNN query node if found
+        def find_knn_child(node)
+          node.children.find { |child| child.type == :knn }
+        end
+
+        # Builds OR conditions excluding KNN query
+        #
+        # @param node [ActiveContext::Query] The query node to process
+        # @param knn_child [ActiveContext::Query] The KNN child to exclude
+        # @return [Hash] A bool query with the remaining conditions
+        def build_or_conditions(node, knn_child)
+          other_queries = node.children.reject { |child| child == knn_child }
+          return {} if other_queries.empty?
+
+          build_bool_query(:should, minimum_should_match: 1) do |queries|
+            other_queries.each { |child| queries << extract_query(process(child)) }
+          end
+        end
+
+        # Helper to build bool queries consistently
+        #
+        # @param type [:must, :should] The bool query type
+        # @param minimum_should_match [Integer, nil] Optional minimum matches for should clauses
+        # @yield [Array] Yields an array to add query clauses to
+        # @return [Hash] The constructed bool query
+        def build_bool_query(type, minimum_should_match: nil)
+          query = { bool: { type => [] } }
+          query[:bool][:minimum_should_match] = minimum_should_match if minimum_should_match
+
+          yield query[:bool][type]
+
+          { query: query }
+        end
+
+        # Safely extracts query part from processed result
+        #
+        # @param processed [Hash] The processed query result
+        # @return [Hash] The query part
+        def extract_query(processed)
+          processed[:query]
+        end
+
+        # Extracts KNN parameters from a node into the expected format
+        #
+        # @param node [ActiveContext::Query] The KNN query node
+        # @return [Hash] The formatted KNN parameters
+        # @example
+        #   # => {
+        #   #      field: 'embedding',
+        #   #      query_vector: [0.1, 0.2],
+        #   #      k: 5,
+        #   #      num_candidates: 50
+        #   #    }
+        def extract_knn_params(node)
+          knn_params = node.value
+          k = knn_params[:limit]
+          {
+            field: knn_params[:target],
+            query_vector: knn_params[:vector],
+            k: k,
+            num_candidates: k * 10
+          }
+        end
+      end
+    end
+  end
+end
diff --git a/gems/gitlab-active-context/lib/active_context/query/processor_example.rb b/gems/gitlab-active-context/lib/active_context/query/processor_example.rb
index 0a39dcb7c59e6..c0b89561f5288 100644
--- a/gems/gitlab-active-context/lib/active_context/query/processor_example.rb
+++ b/gems/gitlab-active-context/lib/active_context/query/processor_example.rb
@@ -12,7 +12,9 @@ class Query
     # - Unquoted identifiers (column names)
     # - Direct interpolation of arrays and limits
     class ProcessorExample
-      def self.to_sql(node)
+      include ActiveContext::Databases::Concerns::Processor
+
+      def self.transform(node)
         new.process(node)
       end
 
diff --git a/gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/client_spec.rb b/gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/client_spec.rb
index cb7d8354a84b6..bf313814ab95c 100644
--- a/gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/client_spec.rb
+++ b/gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/client_spec.rb
@@ -8,6 +8,7 @@
   describe '#search' do
     let(:elasticsearch_client) { instance_double(Elasticsearch::Client) }
     let(:search_response) { { 'hits' => { 'total' => 5, 'hits' => [] } } }
+    let(:query) { ActiveContext::Query.filter(project_id: 1) }
 
     before do
       allow(client).to receive(:client).and_return(elasticsearch_client)
@@ -16,11 +17,11 @@
 
     it 'calls search on the Elasticsearch client' do
       expect(elasticsearch_client).to receive(:search)
-      client.search('query')
+      client.search(collection: 'test', query: query)
     end
 
     it 'returns a QueryResult object' do
-      result = client.search('query')
+      result = client.search(collection: 'test', query: query)
       expect(result).to be_a(ActiveContext::Databases::Elasticsearch::QueryResult)
     end
   end
diff --git a/gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/processor_spec.rb b/gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/processor_spec.rb
new file mode 100644
index 0000000000000..796b2f512dd63
--- /dev/null
+++ b/gems/gitlab-active-context/spec/lib/active_context/databases/elasticsearch/processor_spec.rb
@@ -0,0 +1,383 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe ActiveContext::Databases::Elasticsearch::Processor do
+  it_behaves_like 'a query processor'
+
+  describe '#process' do
+    subject(:processor) { described_class.new }
+
+    let(:simple_filter) { ActiveContext::Query.filter(status: 'active') }
+    let(:simple_prefix) { ActiveContext::Query.prefix(name: 'test') }
+    let(:simple_knn) do
+      ActiveContext::Query.knn(
+        target: 'embedding',
+        vector: [0.1, 0.2],
+        limit: 5
+      )
+    end
+
+    context 'with filter queries' do
+      it 'creates a term query for exact matches' do
+        query = ActiveContext::Query.filter(status: 'active', project_id: 123)
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [
+                { term: { status: 'active' } },
+                { term: { project_id: 123 } }
+              ]
+            }
+          }
+        )
+      end
+
+      it 'creates a terms query for array values' do
+        query = ActiveContext::Query.filter(project_id: [1, 2, 3])
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [
+                { terms: { project_id: [1, 2, 3] } }
+              ]
+            }
+          }
+        )
+      end
+
+      it 'handles mixed term and terms queries' do
+        query = ActiveContext::Query.filter(
+          status: 'active',
+          project_id: [1, 2, 3],
+          category: 'product'
+        )
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [
+                { term: { status: 'active' } },
+                { terms: { project_id: [1, 2, 3] } },
+                { term: { category: 'product' } }
+              ]
+            }
+          }
+        )
+      end
+
+      it 'combines multiple filter queries with array values in must clauses' do
+        filter1 = ActiveContext::Query.filter(status: %w[active pending])
+        filter2 = ActiveContext::Query.filter(category: 'product')
+        query = ActiveContext::Query.and(filter1, filter2)
+
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [
+                { bool: { must: [{ terms: { status: %w[active pending] } }] } },
+                { bool: { must: [{ term: { category: 'product' } }] } }
+              ]
+            }
+          }
+        )
+      end
+
+      it 'combines multiple filter queries in must clauses' do
+        filter1 = ActiveContext::Query.filter(status: 'active')
+        filter2 = ActiveContext::Query.filter(category: 'product')
+        query = ActiveContext::Query.and(filter1, filter2)
+
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [
+                { bool: { must: [{ term: { status: 'active' } }] } },
+                { bool: { must: [{ term: { category: 'product' } }] } }
+              ]
+            }
+          }
+        )
+      end
+    end
+
+    context 'with prefix queries' do
+      it 'creates a prefix query for starts-with matches' do
+        query = ActiveContext::Query.prefix(name: 'test', path: 'foo/')
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [
+                { prefix: { name: 'test' } },
+                { prefix: { path: 'foo/' } }
+              ]
+            }
+          }
+        )
+      end
+    end
+
+    context 'with OR queries' do
+      it 'creates a should query with minimum_should_match' do
+        query = ActiveContext::Query.or(simple_filter, simple_prefix)
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              should: [
+                { bool: { must: [{ term: { status: 'active' } }] } },
+                { bool: { must: [{ prefix: { name: 'test' } }] } }
+              ],
+              minimum_should_match: 1
+            }
+          }
+        )
+      end
+
+      it 'handles terms queries in OR conditions' do
+        filter1 = ActiveContext::Query.filter(project_id: [1, 2, 3])
+        filter2 = ActiveContext::Query.filter(status: 'active')
+        query = ActiveContext::Query.or(filter1, filter2)
+
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              should: [
+                { bool: { must: [{ terms: { project_id: [1, 2, 3] } }] } },
+                { bool: { must: [{ term: { status: 'active' } }] } }
+              ],
+              minimum_should_match: 1
+            }
+          }
+        )
+      end
+
+      context 'when containing KNN' do
+        it 'combines KNN with other conditions' do
+          query = ActiveContext::Query.or(simple_knn, simple_filter)
+          result = processor.process(query)
+
+          expect(result).to eq(
+            knn: {
+              field: 'embedding',
+              query_vector: [0.1, 0.2],
+              k: 5,
+              num_candidates: 50
+            },
+            query: {
+              bool: {
+                should: [
+                  { bool: { must: [{ term: { status: 'active' } }] } }
+                ],
+                minimum_should_match: 1
+              }
+            }
+          )
+        end
+
+        it 'returns only KNN query when no other conditions' do
+          query = ActiveContext::Query.or(simple_knn)
+          result = processor.process(query)
+
+          expect(result).to eq(
+            knn: {
+              field: 'embedding',
+              query_vector: [0.1, 0.2],
+              k: 5,
+              num_candidates: 50
+            }
+          )
+        end
+      end
+    end
+
+    context 'with AND queries' do
+      it 'creates a must query combining conditions' do
+        query = ActiveContext::Query.and(simple_filter, simple_prefix)
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [
+                { bool: { must: [{ term: { status: 'active' } }] } },
+                { bool: { must: [{ prefix: { name: 'test' } }] } }
+              ]
+            }
+          }
+        )
+      end
+    end
+
+    context 'with KNN queries' do
+      it 'creates a basic KNN query' do
+        result = processor.process(simple_knn)
+
+        expect(result).to eq(
+          knn: {
+            field: 'embedding',
+            query_vector: [0.1, 0.2],
+            k: 5,
+            num_candidates: 50
+          }
+        )
+      end
+
+      it 'applies filters inside the KNN query' do
+        query = simple_filter.knn(
+          target: 'embedding',
+          vector: [0.1, 0.2],
+          limit: 5
+        )
+
+        result = processor.process(query)
+
+        expect(result).to eq(
+          knn: {
+            field: 'embedding',
+            query_vector: [0.1, 0.2],
+            k: 5,
+            num_candidates: 50,
+            filter: {
+              bool: {
+                must: [{ term: { status: 'active' } }]
+              }
+            }
+          }
+        )
+      end
+
+      it 'handles terms filter inside KNN query' do
+        filter = ActiveContext::Query.filter(project_id: [1, 2, 3])
+        query = filter.knn(
+          target: 'embedding',
+          vector: [0.1, 0.2],
+          limit: 5
+        )
+
+        result = processor.process(query)
+
+        expect(result).to eq(
+          knn: {
+            field: 'embedding',
+            query_vector: [0.1, 0.2],
+            k: 5,
+            num_candidates: 50,
+            filter: {
+              bool: {
+                must: [{ terms: { project_id: [1, 2, 3] } }]
+              }
+            }
+          }
+        )
+      end
+
+      it 'handles multiple filter conditions inside KNN query' do
+        filter1 = ActiveContext::Query.filter(status: 'active')
+        filter2 = ActiveContext::Query.filter(category: 'product')
+        base_query = ActiveContext::Query.and(filter1, filter2)
+
+        query = base_query.knn(
+          target: 'embedding',
+          vector: [0.1, 0.2],
+          limit: 5
+        )
+
+        result = processor.process(query)
+
+        expect(result).to eq(
+          knn: {
+            field: 'embedding',
+            query_vector: [0.1, 0.2],
+            k: 5,
+            num_candidates: 50,
+            filter: {
+              bool: {
+                must: [
+                  { bool: { must: [{ term: { status: 'active' } }] } },
+                  { bool: { must: [{ term: { category: 'product' } }] } }
+                ]
+              }
+            }
+          }
+        )
+      end
+
+      it 'properly handles KNN with both prefix and filter conditions inside KNN query' do
+        filter = ActiveContext::Query.filter(status: 'active')
+        prefix = ActiveContext::Query.prefix(name: 'test')
+        base_query = ActiveContext::Query.and(filter, prefix)
+
+        query = base_query.knn(
+          target: 'embedding',
+          vector: [0.1, 0.2],
+          limit: 5
+        )
+
+        result = processor.process(query)
+
+        expect(result).to eq(
+          knn: {
+            field: 'embedding',
+            query_vector: [0.1, 0.2],
+            k: 5,
+            num_candidates: 50,
+            filter: {
+              bool: {
+                must: [
+                  { bool: { must: [{ term: { status: 'active' } }] } },
+                  { bool: { must: [{ prefix: { name: 'test' } }] } }
+                ]
+              }
+            }
+          }
+        )
+      end
+    end
+
+    context 'with limit queries' do
+      it 'adds size parameter to the query' do
+        query = simple_filter.limit(10)
+        result = processor.process(query)
+
+        expect(result).to eq(
+          query: {
+            bool: {
+              must: [{ term: { status: 'active' } }]
+            }
+          },
+          size: 10
+        )
+      end
+
+      it 'adds size parameter to KNN query' do
+        query = simple_knn.limit(10)
+        result = processor.process(query)
+
+        expect(result).to eq(
+          knn: {
+            field: 'embedding',
+            query_vector: [0.1, 0.2],
+            k: 5,
+            num_candidates: 50
+          },
+          size: 10
+        )
+      end
+    end
+  end
+end
diff --git a/gems/gitlab-active-context/spec/spec_helper.rb b/gems/gitlab-active-context/spec/spec_helper.rb
index 97e31cc879efc..eada08bc778b5 100644
--- a/gems/gitlab-active-context/spec/spec_helper.rb
+++ b/gems/gitlab-active-context/spec/spec_helper.rb
@@ -8,6 +8,8 @@
 require 'active_support/concern'
 require 'redis'
 
+Dir[File.join(__dir__, 'support/**/*.rb')].each { |f| require f }
+
 RSpec.configure do |config|
   # Enable flags like --only-failures and --next-failure
   config.example_status_persistence_file_path = ".rspec_status"
diff --git a/gems/gitlab-active-context/spec/support/shared_examples/query_processor_examples.rb b/gems/gitlab-active-context/spec/support/shared_examples/query_processor_examples.rb
new file mode 100644
index 0000000000000..6fbf8d24417ae
--- /dev/null
+++ b/gems/gitlab-active-context/spec/support/shared_examples/query_processor_examples.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+RSpec.shared_examples 'a query processor' do
+  describe '.transform' do
+    it 'delegates to a new instance' do
+      query = ActiveContext::Query.filter(foo: :bar)
+      processor = instance_double(described_class)
+
+      expect(described_class).to receive(:new).and_return(processor)
+      expect(processor).to receive(:process).with(query)
+
+      described_class.transform(query)
+    end
+  end
+
+  describe '#process' do
+    subject(:processor) { described_class.new }
+
+    it 'requires implementation in subclass' do
+      expect(processor).to respond_to(:process)
+    end
+  end
+
+  describe 'error handling' do
+    subject(:processor) { described_class.new }
+
+    it 'raises ArgumentError for unsupported node types' do
+      query = instance_double(ActiveContext::Query, type: :invalid)
+      expect { processor.process(query) }.to raise_error(ArgumentError, /unsupported.*type/i)
+    end
+  end
+end
-- 
GitLab