From f4fa78e183dea282f7f6ff5b2d14842f093d7ca0 Mon Sep 17 00:00:00 2001 From: Dmitry Gruzd <dgruzd@gitlab.com> Date: Tue, 5 Dec 2023 11:53:50 +0000 Subject: [PATCH] Cache Zoekt responses --- .../ops/zoekt_cache_search_responses.yml | 8 + ee/lib/gitlab/zoekt/search_results.rb | 99 +++++++++---- ee/lib/search/zoekt/cache.rb | 95 ++++++++++++ .../lib/gitlab/zoekt/search_results_spec.rb | 16 ++ ee/spec/lib/search/zoekt/cache_spec.rb | 139 ++++++++++++++++++ 5 files changed, 327 insertions(+), 30 deletions(-) create mode 100644 ee/config/feature_flags/ops/zoekt_cache_search_responses.yml create mode 100644 ee/lib/search/zoekt/cache.rb create mode 100644 ee/spec/lib/search/zoekt/cache_spec.rb diff --git a/ee/config/feature_flags/ops/zoekt_cache_search_responses.yml b/ee/config/feature_flags/ops/zoekt_cache_search_responses.yml new file mode 100644 index 0000000000000..b67bacef64629 --- /dev/null +++ b/ee/config/feature_flags/ops/zoekt_cache_search_responses.yml @@ -0,0 +1,8 @@ +--- +name: zoekt_cache_search_responses +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/137337 +rollout_issue_url: +milestone: '16.7' +type: ops +group: group::global search +default_enabled: false diff --git a/ee/lib/gitlab/zoekt/search_results.rb b/ee/lib/gitlab/zoekt/search_results.rb index 91b64b1613887..faa7abca96b48 100644 --- a/ee/lib/gitlab/zoekt/search_results.rb +++ b/ee/lib/gitlab/zoekt/search_results.rb @@ -25,11 +25,11 @@ def initialize(current_user, query, limit_project_ids = nil, node_id:, order_by: @filters = filters end - def objects(scope, page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil) + def objects(_scope, page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil) blobs(page: page, per_page: per_page, preload_method: preload_method) end - def formatted_count(scope) + def formatted_count(_scope) limited_counter_with_delimiter(blobs_count) end @@ -60,7 +60,7 @@ def parse_zoekt_search_result(result, project) ) end - def aggregations(scope) + def aggregations(_scope) [] end @@ -114,64 +114,103 @@ def limited_counter_with_delimiter(count) end end - def search_as_found_blob(query, repositories, page:, per_page:, options:, preload_method:) + def search_as_found_blob(query, _repositories, page:, per_page:, options:, preload_method:) zoekt_search_and_wrap(query, page: page, - per_page: per_page, - options: options, - preload_method: preload_method) do |result, project| + per_page: per_page, + options: options, + preload_method: preload_method) do |result, project| parse_zoekt_search_result(result, project) end end - def zoekt_extract_results(search_result, per_page:, offset:) - results = [] + # Performs zoekt search and parses the response + # + # @param query [String] search query + # @param per_page [Integer] how many documents per page + # @param options [Hash] additional options + # @param page_limit [Integer] maximum number of pages we parse + # @return [Array<Hash, Integer>] the results and total count + def zoekt_search(query, per_page:, options:, page_limit:) + response = ::Gitlab::Search::Zoekt::Client.search( + query, + num: ZOEKT_COUNT_LIMIT, + project_ids: options[:project_ids], + node_id: options[:node_id] + ) + + if response[:Error] + @blobs_count = 0 + @error = response[:Error] + return [{}, 0] + end + + total_count = response[:Result][:MatchCount].clamp(0, ZOEKT_COUNT_LIMIT) + results = zoekt_extract_result_pages(response, per_page: per_page, page_limit: page_limit) + [results, total_count] + end + + # Extracts results from the Zoekt resposne + # + # @param response [Hash] JSON response converted to hash + # @param per_page [Integer] how many documents per page + # @param page_limit [Integer] maximum number of pages we parse + # @return [Hash<Integer, Array<Hash>>] results hash with pages as keys (zero-based) + def zoekt_extract_result_pages(response, per_page:, page_limit:) + results = {} i = 0 - (search_result[:Result][:Files] || []).each do |r| - project_id = r[:Repository].to_i - r[:LineMatches].each do |match| - i += 1 + files = response.dig(:Result, :Files) || [] + files.each do |file| + project_id = file[:Repository].to_i - next if i <= offset - return results if i > offset + per_page + file[:LineMatches].each do |match| + current_page = i / per_page + return results if current_page == page_limit - results << { + results[current_page] ||= [] + results[current_page] << { project_id: project_id, content: [match[:Before], match[:Line], match[:After]].compact.map { |l| Base64.decode64(l) }.join("\n"), line: match[:LineNumber], - path: r[:FileName] + path: file[:FileName] } + + i += 1 end end results end - def zoekt_search_and_wrap(query, page: 1, per_page: 20, options: {}, preload_method: nil, &blk) - search_result = ::Gitlab::Search::Zoekt::Client.search( + def zoekt_search_and_wrap(query, per_page:, page: 1, options: {}, preload_method: nil, &blk) + zoekt_cache = ::Search::Zoekt::Cache.new( query, - num: ZOEKT_COUNT_LIMIT, + current_user: current_user, + page: page, + per_page: per_page, project_ids: options[:project_ids], - node_id: options[:node_id] + max_per_page: DEFAULT_PER_PAGE * 2 ) - if search_result[:Error] - @blobs_count = 0 - @error = search_result[:Error] - return Kaminari.paginate_array([]) + search_results, total_count = zoekt_cache.fetch do |page_limit| + zoekt_search(query, per_page: per_page, page_limit: page_limit, options: options) end - total_count = search_result[:Result][:MatchCount].clamp(0, ZOEKT_COUNT_LIMIT) - offset = (page - 1) * per_page - - results = zoekt_extract_results(search_result, per_page: per_page, offset: offset) - items, total_count = yield_each_zoekt_search_result(results, preload_method, total_count, &blk) + items, total_count = yield_each_zoekt_search_result( + search_results[page - 1], + preload_method, + total_count, + &blk + ) + offset = (page - 1) * per_page Kaminari.paginate_array(items, total_count: total_count, limit: per_page, offset: offset) end def yield_each_zoekt_search_result(response, preload_method, total_count) + return [[], total_count] if total_count == 0 + project_ids = response.pluck(:project_id).uniq # rubocop:disable CodeReuse/ActiveRecord projects = Project.with_route.id_in(project_ids) projects = projects.public_send(preload_method) if preload_method # rubocop:disable GitlabSecurity/PublicSend diff --git a/ee/lib/search/zoekt/cache.rb b/ee/lib/search/zoekt/cache.rb new file mode 100644 index 0000000000000..ff614f8372ab0 --- /dev/null +++ b/ee/lib/search/zoekt/cache.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +module Search + module Zoekt + class Cache + MAX_PAGES = 10 + EXPIRES_IN = 5.minutes + + attr_reader :current_user, :query, :project_ids, :per_page, :current_page, :max_per_page + + def initialize(query, current_user:, project_ids:, per_page:, page:, max_per_page:) + @query = query + @current_user = current_user + @project_ids = project_ids + @per_page = per_page + @current_page = page + @max_per_page = max_per_page + end + + def enabled? + return false unless valid_arguments? + + Feature.enabled?(:zoekt_cache_search_responses, current_user, type: :ops) + end + + def fetch + return yield page_limit unless enabled? + + search_results, total_count = read_cache + + unless search_results + search_results, total_count = yield page_limit + + update_cache!(search_results: search_results, total_count: total_count) + end + + [search_results, total_count] + end + + def cache_key(page: current_page) + user_id = current_user&.id || 0 + # We need to use {user_id} as part of the key for Redis Cluster support + "cache:zoekt:{#{user_id}}/#{search_fingerprint}/#{per_page}/#{page}" + end + + private + + def valid_arguments? + project_ids.is_a?(Array) && project_ids.present? && per_page <= max_per_page + end + + def with_redis(&block) + Gitlab::Redis::Cache.with(&block) # rubocop:disable CodeReuse/ActiveRecord -- this has nothing to do with AR + end + + def page_limit + return [current_page, MAX_PAGES].max if enabled? + + current_page + end + + def search_fingerprint + OpenSSL::Digest.hexdigest('SHA256', "#{query}-#{project_ids.sort.join(',')}") + end + + def read_cache + data = with_redis do |redis| + redis.get(cache_key) + end + + return unless data + + Marshal.load(data) # rubocop:disable Security/MarshalLoad -- We're loading data we saved below (similar to Rails.cache) + end + + def update_cache!(search_results:, total_count:) + return unless search_results && total_count > 0 + + with_redis do |redis| + redis.multi do |pipeline| + (0..MAX_PAGES).each do |page_idx| + result = search_results[page_idx] + next unless result + + cached_result = [{ page_idx => result }, total_count] + + key = cache_key(page: page_idx + 1) + pipeline.set(key, Marshal.dump(cached_result), ex: EXPIRES_IN) + end + end + end + end + end + end +end diff --git a/ee/spec/lib/gitlab/zoekt/search_results_spec.rb b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb index ad5d4309970e9..2536190c6e2fb 100644 --- a/ee/spec/lib/gitlab/zoekt/search_results_spec.rb +++ b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb @@ -29,6 +29,22 @@ expect(results.blobs_count).to eq 5 end + it 'instanciates zoekt cache with correct arguments' do + query = 'use.*egex' + results = described_class.new(user, query, limit_project_ids, node_id: node_id) + + expect(Search::Zoekt::Cache).to receive(:new).with( + query, + current_user: user, + page: 1, + per_page: described_class::DEFAULT_PER_PAGE, + project_ids: limit_project_ids, + max_per_page: described_class::DEFAULT_PER_PAGE * 2 + ).and_call_original + + results.objects('blobs') + end + it 'correctly handles pagination' do per_page = 2 diff --git a/ee/spec/lib/search/zoekt/cache_spec.rb b/ee/spec/lib/search/zoekt/cache_spec.rb new file mode 100644 index 0000000000000..14530d67613af --- /dev/null +++ b/ee/spec/lib/search/zoekt/cache_spec.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Search::Zoekt::Cache, :clean_gitlab_redis_cache, feature_category: :global_search do + let(:default_options) { { per_page: 20, max_per_page: 40 } } + let(:query) { 'foo' } + let_it_be(:user1) { build(:user, id: 1) } + let(:page) { 1 } + let(:project_ids) { [3, 2, 1] } + let(:search_results) { { 0 => { project_id: 1 }, 1 => { project_id: 2 }, 2 => { project_id: 3 } } } + let(:total_count) { 3 } + let(:response) { [search_results, total_count] } + + subject(:cache) do + described_class.new(query, **default_options.merge(current_user: user1, project_ids: project_ids, page: page)) + end + + before do + stub_const("#{described_class.name}::MAX_PAGES", 2) + end + + describe '#cache_key' do + let(:uniq_examples) do + [ + { current_user: build(:user, id: 1), query: 'foo', project_ids: [3, 2], page: 1 }, + { current_user: build(:user, id: 1), query: 'foo', project_ids: [3, 2], page: 1, per_page: 10 }, + { current_user: build(:user, id: 1), query: 'bar', project_ids: [3, 2], page: 1 }, + { current_user: build(:user, id: 1), query: 'foo', project_ids: [2], page: 1 }, + { current_user: build(:user, id: 1), query: 'foo', project_ids: [3, 2], page: 2 }, + { current_user: build(:user, id: 2), query: 'foo', project_ids: [3, 2], page: 1 }, + { current_user: build(:user, id: nil), query: 'foo', project_ids: [3, 2], page: 1 } + ] + end + + let(:duplicate_examples) do + [ + { current_user: build(:user, id: 1), query: 'foo', project_ids: [1, 3, 2], page: 1, per_page: 20 }, + { current_user: build(:user, id: 1), query: 'foo', project_ids: [2, 3, 1], page: 1, per_page: 20 } + ] + end + + it 'returns unique cache keys for different queries' do + result = {} + + uniq_examples.each do |e| + cache_key = described_class.new(e[:query], **default_options.merge(e.except(:query))).cache_key + + result[cache_key] ||= [] + result[cache_key] << e + end + + result.each do |key, examples| + expect(examples.size).to eq(1), "#{key} has duplicate examples: #{examples}" + end + end + + it 'returns the same cache key for duplicate queries' do + cache_keys = duplicate_examples.map do |e| + described_class.new(e[:query], **default_options.merge(e.except(:query))).cache_key + end + + expect(cache_keys.uniq.size).to eq(1) + end + end + + context 'when feature flag is disabled' do + before do + stub_feature_flags(zoekt_cache_search_responses: false) + end + + describe '#enabled?' do + it 'returns false' do + expect(cache.enabled?).to be false + end + end + + describe '#fetch' do + it 'does not use cache' do + expect(cache).not_to receive(:read_cache) + expect(response).to receive(:length) + + data = cache.fetch do + response.length + response + end + + expect(data).to eq(response) + end + end + end + + context 'when feature flag is enabled' do + before do + stub_feature_flags(zoekt_cache_search_responses: true) + end + + describe '#enabled?' do + it 'returns true' do + expect(cache.enabled?).to be true + end + + context 'when project_ids is empty' do + let(:project_ids) { [] } + + it 'returns false' do + expect(cache.enabled?).to be false + end + end + end + + describe '#fetch' do + it 'reads and updates cache' do + expect(cache).to receive(:read_cache) + expect(cache).to receive(:update_cache!) + + data = cache.fetch do |page_limit| + expect(page_limit).to eq(described_class::MAX_PAGES) + response + end + + expect(data).to eq(response) + end + + context 'when page is higher than the limit' do + let(:page) { 3 } + + it 'sets the correct page limit' do + data = cache.fetch do |page_limit| + expect(page_limit).to eq(page) + response + end + + expect(data).to eq(response) + end + end + end + end +end -- GitLab