From 62075e96412ade4e9acb64a12651c67f9ed29483 Mon Sep 17 00:00:00 2001
From: Nikola Milojevic <nmilojevic@gitlab.com>
Date: Thu, 16 Sep 2021 11:05:03 +0000
Subject: [PATCH] Add rubocop rule to require including_scheduled: true

- for idempotent workers that are utilizing load balancing
- add specs to cover all usecases
---
 .../worker_data_consistency.rb                |  65 +++++++
 ...ker_data_consistency_with_deduplication.rb | 154 ++++++++++++++++
 rubocop/cop/worker_data_consistency.rb        |  63 -------
 .../worker_data_consistency_spec.rb           |   4 +-
 ...ata_consistency_with_deduplication_spec.rb | 166 ++++++++++++++++++
 5 files changed, 387 insertions(+), 65 deletions(-)
 create mode 100644 rubocop/cop/sidekiq_load_balancing/worker_data_consistency.rb
 create mode 100644 rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication.rb
 delete mode 100644 rubocop/cop/worker_data_consistency.rb
 rename spec/rubocop/cop/{ => sidekiq_load_balancing}/worker_data_consistency_spec.rb (87%)
 create mode 100644 spec/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication_spec.rb

diff --git a/rubocop/cop/sidekiq_load_balancing/worker_data_consistency.rb b/rubocop/cop/sidekiq_load_balancing/worker_data_consistency.rb
new file mode 100644
index 000000000000..7a2e7ee00b43
--- /dev/null
+++ b/rubocop/cop/sidekiq_load_balancing/worker_data_consistency.rb
@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+
+require_relative '../../code_reuse_helpers'
+
+module RuboCop
+  module Cop
+    module SidekiqLoadBalancing
+      # This cop checks for a call to `data_consistency` to exist in Sidekiq workers.
+      #
+      # @example
+      #
+      # # bad
+      # class BadWorker
+      #   def perform
+      #   end
+      # end
+      #
+      # # good
+      # class GoodWorker
+      #   data_consistency :delayed
+      #
+      #   def perform
+      #   end
+      # end
+      #
+      class WorkerDataConsistency < RuboCop::Cop::Cop
+        include CodeReuseHelpers
+
+        HELP_LINK = 'https://docs.gitlab.com/ee/development/sidekiq_style_guide.html#job-data-consistency-strategies'
+
+        MSG = <<~MSG
+          Should define data_consistency expectation.
+
+          It is encouraged for workers to use database replicas as much as possible by declaring
+          data_consistency to use the :delayed or :sticky modes. Mode :always will result in the
+          worker always hitting the primary database node for both reads and writes, which limits
+          scalability.
+
+          Some guidelines:
+
+          1. If your worker mostly writes or reads its own writes, use mode :always. TRY TO AVOID THIS.
+          2. If your worker performs mostly reads and can tolerate small delays, use mode :delayed.
+          3. If your worker performs mostly reads but cannot tolerate any delays, use mode :sticky.
+
+          See #{HELP_LINK} for a more detailed explanation of these settings.
+        MSG
+
+        def_node_search :application_worker?, <<~PATTERN
+        `(send nil? :include (const nil? :ApplicationWorker))
+        PATTERN
+
+        def_node_search :data_consistency_defined?, <<~PATTERN
+          `(send nil? :data_consistency ...)
+        PATTERN
+
+        def on_class(node)
+          return unless in_worker?(node) && application_worker?(node)
+          return if data_consistency_defined?(node)
+
+          add_offense(node, location: :expression)
+        end
+      end
+    end
+  end
+end
diff --git a/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication.rb b/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication.rb
new file mode 100644
index 000000000000..e8b4b513a23a
--- /dev/null
+++ b/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication.rb
@@ -0,0 +1,154 @@
+# frozen_string_literal: true
+
+require_relative '../../code_reuse_helpers'
+
+module RuboCop
+  module Cop
+    module SidekiqLoadBalancing
+      # This cop checks for including_scheduled: true option in idempotent Sidekiq workers that utilize load balancing capabilities.
+      #
+      # @example
+      #
+      # # bad
+      # class BadWorker
+      #   include ApplicationWorker
+      #
+      #   data_consistency :delayed
+      #   idempotent!
+      #
+      #   def perform
+      #   end
+      # end
+      #
+      # # bad
+      # class BadWorker
+      #   include ApplicationWorker
+      #
+      #   data_consistency :delayed
+      #
+      #   deduplicate :until_executing
+      #   idempotent!
+      #
+      #   def perform
+      #   end
+      # end
+      #
+      # # good
+      # class GoodWorker
+      #   include ApplicationWorker
+      #
+      #   data_consistency :delayed
+      #
+      #   deduplicate :until_executing, including_scheduled: true
+      #   idempotent!
+      #
+      #   def perform
+      #   end
+      # end
+      #
+      class WorkerDataConsistencyWithDeduplication < RuboCop::Cop::Base
+        include CodeReuseHelpers
+        extend AutoCorrector
+
+        HELP_LINK = 'https://docs.gitlab.com/ee/development/sidekiq_style_guide.html#scheduling-jobs-in-the-future'
+        REPLACEMENT = ', including_scheduled: true'
+        DEFAULT_STRATEGY = ':until_executing'
+
+        MSG = <<~MSG
+          Workers that declare either `:sticky` or `:delayed` data consistency become eligible for database load-balancing.
+          In both cases, jobs are enqueued with a short delay.
+
+          If you do want to deduplicate jobs that utilize load-balancing, you need to specify including_scheduled: true
+          argument when defining deduplication strategy.
+
+          See #{HELP_LINK} for a more detailed explanation of these settings.
+        MSG
+
+        def_node_search :application_worker?, <<~PATTERN
+          `(send nil? :include (const nil? :ApplicationWorker))
+        PATTERN
+
+        def_node_search :idempotent_worker?, <<~PATTERN
+          `(send nil? :idempotent!)
+        PATTERN
+
+        def_node_search :data_consistency_defined?, <<~PATTERN
+          `(send nil? :data_consistency (sym {:sticky :delayed }))
+        PATTERN
+
+        def_node_matcher :including_scheduled?, <<~PATTERN
+          `(hash <(pair (sym :including_scheduled) (%1)) ...>)
+        PATTERN
+
+        def_node_matcher :deduplicate_strategy?, <<~PATTERN
+          `(send nil? :deduplicate (sym $_) $(...)?)
+        PATTERN
+
+        def on_class(node)
+          return unless in_worker?(node)
+          return unless application_worker?(node)
+          return unless idempotent_worker?(node)
+          return unless data_consistency_defined?(node)
+
+          @strategy, options = deduplicate_strategy?(node)
+          including_scheduled = false
+          if options
+            @deduplicate_options = options[0]
+            including_scheduled = including_scheduled?(@deduplicate_options, :true) # rubocop:disable Lint/BooleanSymbol
+          end
+
+          @offense = !(including_scheduled || @strategy == :none)
+        end
+
+        def on_send(node)
+          return unless offense
+
+          if node.children[1] == :deduplicate
+            add_offense(node.loc.expression) do |corrector|
+              autocorrect_deduplicate_strategy(node, corrector)
+            end
+          elsif node.children[1] == :idempotent! && !strategy
+            add_offense(node.loc.expression) do |corrector|
+              autocorrect_missing_deduplicate_strategy(node, corrector)
+            end
+          end
+        end
+
+        private
+
+        attr_reader :offense, :deduplicate_options, :strategy
+
+        def autocorrect_deduplicate_with_options(corrector)
+          if including_scheduled?(deduplicate_options, :false) # rubocop:disable Lint/BooleanSymbol
+            replacement = deduplicate_options.source.sub("including_scheduled: false", "including_scheduled: true")
+            corrector.replace(deduplicate_options.loc.expression, replacement)
+          else
+            corrector.insert_after(deduplicate_options.loc.expression, REPLACEMENT)
+          end
+        end
+
+        def autocorrect_deduplicate_without_options(node, corrector)
+          corrector.insert_after(node.loc.expression, REPLACEMENT)
+        end
+
+        def autocorrect_missing_deduplicate_strategy(node, corrector)
+          indent_found = node.source_range.source_line =~ /^( +)/
+          # Get indentation size
+          whitespaces = Regexp.last_match(1).size if indent_found
+          replacement = "deduplicate #{DEFAULT_STRATEGY}#{REPLACEMENT}\n"
+          # Add indentation in the end since we are inserting a whole line before idempotent!
+          replacement += ' ' * whitespaces.to_i
+          corrector.insert_before(node.source_range, replacement)
+        end
+
+        def autocorrect_deduplicate_strategy(node, corrector)
+          if deduplicate_options
+            autocorrect_deduplicate_with_options(corrector)
+          else
+            autocorrect_deduplicate_without_options(node, corrector)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/rubocop/cop/worker_data_consistency.rb b/rubocop/cop/worker_data_consistency.rb
deleted file mode 100644
index e9047750f3e0..000000000000
--- a/rubocop/cop/worker_data_consistency.rb
+++ /dev/null
@@ -1,63 +0,0 @@
-# frozen_string_literal: true
-
-require_relative '../code_reuse_helpers'
-
-module RuboCop
-  module Cop
-    # This cop checks for a call to `data_consistency` to exist in Sidekiq workers.
-    #
-    # @example
-    #
-    # # bad
-    # class BadWorker
-    #   def perform
-    #   end
-    # end
-    #
-    # # good
-    # class GoodWorker
-    #   data_consistency :delayed
-    #
-    #   def perform
-    #   end
-    # end
-    #
-    class WorkerDataConsistency < RuboCop::Cop::Cop
-      include CodeReuseHelpers
-
-      HELP_LINK = 'https://docs.gitlab.com/ee/development/sidekiq_style_guide.html#job-data-consistency-strategies'
-
-      MSG = <<~MSG
-        Should define data_consistency expectation.
-
-        It is encouraged for workers to use database replicas as much as possible by declaring
-        data_consistency to use the :delayed or :sticky modes. Mode :always will result in the
-        worker always hitting the primary database node for both reads and writes, which limits
-        scalability.
-
-        Some guidelines:
-        
-        1. If your worker mostly writes or reads its own writes, use mode :always. TRY TO AVOID THIS.
-        2. If your worker performs mostly reads and can tolerate small delays, use mode :delayed.
-        3. If your worker performs mostly reads but cannot tolerate any delays, use mode :sticky.
-
-        See #{HELP_LINK} for a more detailed explanation of these settings.
-      MSG
-
-      def_node_search :application_worker?, <<~PATTERN
-      `(send nil? :include (const nil? :ApplicationWorker))
-      PATTERN
-
-      def_node_search :data_consistency_defined?, <<~PATTERN
-        `(send nil? :data_consistency ...)
-      PATTERN
-
-      def on_class(node)
-        return unless in_worker?(node) && application_worker?(node)
-        return if data_consistency_defined?(node)
-
-        add_offense(node, location: :expression)
-      end
-    end
-  end
-end
diff --git a/spec/rubocop/cop/worker_data_consistency_spec.rb b/spec/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_spec.rb
similarity index 87%
rename from spec/rubocop/cop/worker_data_consistency_spec.rb
rename to spec/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_spec.rb
index 5fa42bf2b87d..cf8d0d1b66f1 100644
--- a/spec/rubocop/cop/worker_data_consistency_spec.rb
+++ b/spec/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_spec.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true
 
 require 'fast_spec_helper'
-require_relative '../../../rubocop/cop/worker_data_consistency'
+require_relative '../../../../rubocop/cop/sidekiq_load_balancing/worker_data_consistency'
 
-RSpec.describe RuboCop::Cop::WorkerDataConsistency do
+RSpec.describe RuboCop::Cop::SidekiqLoadBalancing::WorkerDataConsistency do
   subject(:cop) { described_class.new }
 
   before do
diff --git a/spec/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication_spec.rb b/spec/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication_spec.rb
new file mode 100644
index 000000000000..6e7212b1002f
--- /dev/null
+++ b/spec/rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication_spec.rb
@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+require 'rspec-parameterized'
+require_relative '../../../../rubocop/cop/sidekiq_load_balancing/worker_data_consistency_with_deduplication'
+
+RSpec.describe RuboCop::Cop::SidekiqLoadBalancing::WorkerDataConsistencyWithDeduplication do
+  using RSpec::Parameterized::TableSyntax
+
+  subject(:cop) { described_class.new }
+
+  before do
+    allow(cop)
+      .to receive(:in_worker?)
+      .and_return(true)
+  end
+
+  where(:data_consistency) { %i[delayed sticky] }
+
+  with_them do
+    let(:strategy) { described_class::DEFAULT_STRATEGY }
+    let(:corrected) do
+      <<~CORRECTED
+      class SomeWorker
+        include ApplicationWorker
+
+        data_consistency :#{data_consistency}
+
+        deduplicate #{strategy}, including_scheduled: true
+        idempotent!
+      end
+      CORRECTED
+    end
+
+    context 'when deduplication strategy is not explicitly set' do
+      it 'registers an offense and corrects using default strategy' do
+        expect_offense(<<~CODE)
+          class SomeWorker
+            include ApplicationWorker
+
+            data_consistency :#{data_consistency}
+
+            idempotent!
+            ^^^^^^^^^^^ Workers that declare either `:sticky` or `:delayed` data consistency [...]
+          end
+        CODE
+
+        expect_correction(corrected)
+      end
+
+      context 'when identation is different' do
+        let(:corrected) do
+          <<~CORRECTED
+            class SomeWorker
+                include ApplicationWorker
+
+                data_consistency :#{data_consistency}
+
+                deduplicate #{strategy}, including_scheduled: true
+                idempotent!
+            end
+          CORRECTED
+        end
+
+        it 'registers an offense and corrects with correct identation' do
+          expect_offense(<<~CODE)
+            class SomeWorker
+                include ApplicationWorker
+
+                data_consistency :#{data_consistency}
+
+                idempotent!
+                ^^^^^^^^^^^ Workers that declare either `:sticky` or `:delayed` data consistency [...]
+            end
+          CODE
+
+          expect_correction(corrected)
+        end
+      end
+    end
+
+    context 'when deduplication strategy does not include including_scheduling option' do
+      let(:strategy) { ':until_executed' }
+
+      it 'registers an offense and corrects' do
+        expect_offense(<<~CODE)
+          class SomeWorker
+            include ApplicationWorker
+
+            data_consistency :#{data_consistency}
+
+            deduplicate :until_executed
+            ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Workers that declare either `:sticky` or `:delayed` data consistency [...]
+            idempotent!
+          end
+        CODE
+
+        expect_correction(corrected)
+      end
+    end
+
+    context 'when deduplication strategy has including_scheduling option disabled' do
+      let(:strategy) { ':until_executed' }
+
+      it 'registers an offense and corrects' do
+        expect_offense(<<~CODE)
+          class SomeWorker
+            include ApplicationWorker
+
+            data_consistency :#{data_consistency}
+
+            deduplicate :until_executed, including_scheduled: false
+            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Workers that declare either `:sticky` or `:delayed` data consistency [...]
+            idempotent!
+          end
+        CODE
+
+        expect_correction(corrected)
+      end
+    end
+
+    context "when deduplication strategy is :none" do
+      it 'does not register an offense' do
+        expect_no_offenses(<<~CODE)
+          class SomeWorker
+            include ApplicationWorker
+
+            data_consistency :always
+
+            deduplicate :none
+            idempotent!
+          end
+        CODE
+      end
+    end
+
+    context "when deduplication strategy has including_scheduling option enabled" do
+      it 'does not register an offense' do
+        expect_no_offenses(<<~CODE)
+          class SomeWorker
+            include ApplicationWorker
+
+            data_consistency :always
+
+            deduplicate :until_executing, including_scheduled: true
+            idempotent!
+          end
+        CODE
+      end
+    end
+  end
+
+  context "data_consistency: :always" do
+    it 'does not register an offense' do
+      expect_no_offenses(<<~CODE)
+        class SomeWorker
+          include ApplicationWorker
+
+          data_consistency :always
+
+          idempotent!
+        end
+      CODE
+    end
+  end
+end
-- 
GitLab