From e7d458efe21ec68978618e82ae85d20dbaa376b7 Mon Sep 17 00:00:00 2001
From: Qingyu Zhao <qzhao@gitlab.com>
Date: Mon, 16 Dec 2019 22:26:14 +1100
Subject: [PATCH] LRU object caching for GroupProjectObjectBuilder

It reduces database queries and object creation for labels/milestones
during project import.
  - reduces import time when project heavily uses labels/milestones
  - reduces database pressure
---
 Gemfile                                       |  3 ++
 Gemfile.lock                                  |  2 +
 ...t-caching-group-project-object-builder.yml |  5 ++
 .../group_project_object_builder.rb           | 28 +++++++++-
 .../group_project_object_builder_spec.rb      | 53 +++++++++++++++++++
 5 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 changelogs/unreleased/lru-object-caching-group-project-object-builder.yml

diff --git a/Gemfile b/Gemfile
index d8383d138da95..951ae73a318d2 100644
--- a/Gemfile
+++ b/Gemfile
@@ -484,3 +484,6 @@ gem 'countries', '~> 3.0'
 gem 'retriable', '~> 3.1.2'
 
 gem 'liquid', '~> 4.0'
+
+# LRU cache
+gem 'lru_redux'
diff --git a/Gemfile.lock b/Gemfile.lock
index 2be06106f3aa7..0bf630b42eff8 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -598,6 +598,7 @@ GEM
     loofah (2.4.0)
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
+    lru_redux (1.1.0)
     lumberjack (1.0.13)
     mail (2.7.1)
       mini_mime (>= 0.1.1)
@@ -1263,6 +1264,7 @@ DEPENDENCIES
   liquid (~> 4.0)
   lograge (~> 0.5)
   loofah (~> 2.2)
+  lru_redux
   mail_room (~> 0.10.0)
   marginalia (~> 1.8.0)
   memory_profiler (~> 0.9)
diff --git a/changelogs/unreleased/lru-object-caching-group-project-object-builder.yml b/changelogs/unreleased/lru-object-caching-group-project-object-builder.yml
new file mode 100644
index 0000000000000..bc3f6379de64e
--- /dev/null
+++ b/changelogs/unreleased/lru-object-caching-group-project-object-builder.yml
@@ -0,0 +1,5 @@
+---
+title: LRU object caching for GroupProjectObjectBuilder
+merge_request: 21823
+author:
+type: performance
diff --git a/lib/gitlab/import_export/group_project_object_builder.rb b/lib/gitlab/import_export/group_project_object_builder.rb
index 2e7ab3d4b69c2..78ba489445921 100644
--- a/lib/gitlab/import_export/group_project_object_builder.rb
+++ b/lib/gitlab/import_export/group_project_object_builder.rb
@@ -12,6 +12,13 @@ module ImportExport
     #
     # It also adds some logic around Group Labels/Milestones for edge cases.
     class GroupProjectObjectBuilder
+      # Cache keeps 1000 entries at most, 1000 is chosen based on:
+      #    - one cache entry uses around 0.5K memory, 1000 items uses around 500K.
+      #      (leave some buffer it should be less than 1M). It is afforable cost for project import.
+      #    - for projects in Gitlab.com, it seems 1000 entries for labels/milestones is enough.
+      #      For example, gitlab has ~970 labels and 26 milestones.
+      LRU_CACHE_SIZE = 1000
+
       def self.build(*args)
         Project.transaction do
           new(*args).find
@@ -23,17 +30,34 @@ def initialize(klass, attributes)
         @attributes = attributes
         @group = @attributes['group']
         @project = @attributes['project']
+
+        if Gitlab::SafeRequestStore.active?
+          @lru_cache = cache_from_request_store
+          @cache_key = [klass, attributes]
+        end
       end
 
       def find
         return if epic? && group.nil?
 
-        find_object || klass.create(project_attributes)
+        find_with_cache do
+          find_object || klass.create(project_attributes)
+        end
       end
 
       private
 
-      attr_reader :klass, :attributes, :group, :project
+      attr_reader :klass, :attributes, :group, :project, :lru_cache, :cache_key
+
+      def find_with_cache
+        return yield unless lru_cache && cache_key
+
+        lru_cache[cache_key] ||= yield
+      end
+
+      def cache_from_request_store
+        Gitlab::SafeRequestStore[:lru_cache] ||= LruRedux::Cache.new(LRU_CACHE_SIZE)
+      end
 
       def find_object
         klass.where(where_clause).first
diff --git a/spec/lib/gitlab/import_export/group_project_object_builder_spec.rb b/spec/lib/gitlab/import_export/group_project_object_builder_spec.rb
index 0d0a2df4423f8..355757654daa3 100644
--- a/spec/lib/gitlab/import_export/group_project_object_builder_spec.rb
+++ b/spec/lib/gitlab/import_export/group_project_object_builder_spec.rb
@@ -12,6 +12,59 @@
            group: create(:group))
   end
 
+  let(:lru_cache) { subject.send(:lru_cache) }
+  let(:cache_key) { subject.send(:cache_key) }
+
+  context 'request store is not active' do
+    subject do
+      described_class.new(Label,
+                          'title' => 'group label',
+                          'project' => project,
+                          'group' => project.group)
+    end
+
+    it 'ignore cache initialize' do
+      expect(lru_cache).to be_nil
+      expect(cache_key).to be_nil
+    end
+  end
+
+  context 'request store is active', :request_store do
+    subject do
+      described_class.new(Label,
+                          'title' => 'group label',
+                          'project' => project,
+                          'group' => project.group)
+    end
+
+    it 'initialize cache in memory' do
+      expect(lru_cache).not_to be_nil
+      expect(cache_key).not_to be_nil
+    end
+
+    it 'cache object when first time find the object' do
+      group_label = create(:group_label, name: 'group label', group: project.group)
+
+      expect(subject).to receive(:find_object).and_call_original
+      expect { subject.find }
+        .to change { lru_cache[cache_key] }
+        .from(nil).to(group_label)
+
+      expect(subject.find).to eq(group_label)
+    end
+
+    it 'read from cache when object has been cached' do
+      group_label = create(:group_label, name: 'group label', group: project.group)
+
+      subject.find
+
+      expect(subject).not_to receive(:find_object)
+      expect { subject.find }.not_to change { lru_cache[cache_key] }
+
+      expect(subject.find).to eq(group_label)
+    end
+  end
+
   context 'labels' do
     it 'finds the existing group label' do
       group_label = create(:group_label, name: 'group label', group: project.group)
-- 
GitLab