From a0d218b1897f6db3ca8d24934bc3b3f9f88fa616 Mon Sep 17 00:00:00 2001
From: Andrejs Cunskis <acunskis@gitlab.com>
Date: Wed, 17 Nov 2021 23:53:27 +0000
Subject: [PATCH] E2E: reliable test report

---
 .gitlab/ci/qa-report.gitlab-ci.yml    |  15 ++
 .gitlab/ci/rules.gitlab-ci.yml        |   5 +
 qa/Gemfile                            |   2 +
 qa/Gemfile.lock                       |   7 +-
 qa/Rakefile                           |   1 +
 qa/qa/tools/reliable_report.rb        | 234 ++++++++++++++++++++++++++
 qa/spec/tools/reliable_report_spec.rb | 145 ++++++++++++++++
 qa/tasks/reliable_report.rake         |  21 +++
 8 files changed, 428 insertions(+), 2 deletions(-)
 create mode 100644 .gitlab/ci/qa-report.gitlab-ci.yml
 create mode 100644 qa/qa/tools/reliable_report.rb
 create mode 100644 qa/spec/tools/reliable_report_spec.rb
 create mode 100644 qa/tasks/reliable_report.rake

diff --git a/.gitlab/ci/qa-report.gitlab-ci.yml b/.gitlab/ci/qa-report.gitlab-ci.yml
new file mode 100644
index 0000000000000..61cbcfd58da4e
--- /dev/null
+++ b/.gitlab/ci/qa-report.gitlab-ci.yml
@@ -0,0 +1,15 @@
+test-reliability-report:
+  extends:
+    - .qa:rules:reliable-reports:schedule
+  image:
+    name: ${CI_REGISTRY_IMAGE}/gitlab-ee-qa:${CI_DEFAULT_BRANCH}
+    entrypoint: [""]
+  before_script:
+    - cd /home/gitlab/qa
+  script:
+    - echo "Generate report for 'staging-full' runs"
+    - bundle exec rake "reliable_spec_report[staging-full,30,true]"
+    - bundle exec rake "unreliable_spec_report[staging-full,30,true]"
+    - echo "Generate report for 'package-and-qa' runs"
+    - bundle exec rake "reliable_spec_report[package-and-qa,30,true]"
+    - bundle exec rake "unreliable_spec_report[package-and-qa,30,true]"
diff --git a/.gitlab/ci/rules.gitlab-ci.yml b/.gitlab/ci/rules.gitlab-ci.yml
index 183acceadf98d..d10d9cb63ac83 100644
--- a/.gitlab/ci/rules.gitlab-ci.yml
+++ b/.gitlab/ci/rules.gitlab-ci.yml
@@ -780,6 +780,11 @@
       changes: *feature-flag-development-config-patterns
       allow_failure: true
 
+.qa:rules:reliable-reports:schedule:
+  rules:
+    - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && $CI_PIPELINE_SOURCE == "schedule" && $QA_RELIABLE_REPORT == "true"'
+      allow_failure: true
+
 ###############
 # Rails rules #
 ###############
diff --git a/qa/Gemfile b/qa/Gemfile
index 692bee5cdaf5a..498d05b225408 100644
--- a/qa/Gemfile
+++ b/qa/Gemfile
@@ -25,6 +25,8 @@ gem 'octokit', '~> 4.21'
 gem 'webdrivers', '~> 5.0'
 gem 'zeitwerk', '~> 2.4'
 gem 'influxdb-client', '~> 1.17'
+gem 'terminal-table', '~> 1.8', require: false
+gem 'slack-notifier', '~> 2.4', require: false
 
 gem 'chemlab', '~> 0.9'
 gem 'chemlab-library-www-gitlab-com', '~> 0.1'
diff --git a/qa/Gemfile.lock b/qa/Gemfile.lock
index 733041524e93c..2b5b5e368cfc6 100644
--- a/qa/Gemfile.lock
+++ b/qa/Gemfile.lock
@@ -27,7 +27,7 @@ GEM
       oj (>= 3.10, < 4)
       require_all (>= 2, < 4)
       uuid (>= 2.3, < 3)
-    ast (2.4.1)
+    ast (2.4.2)
     binding_ninja (0.2.3)
     byebug (9.1.0)
     capybara (3.35.3)
@@ -141,7 +141,7 @@ GEM
     parallel (1.19.2)
     parallel_tests (2.29.0)
       parallel
-    parser (2.7.1.4)
+    parser (3.0.2.0)
       ast (~> 2.4.1)
     proc_to_ast (0.1.0)
       coderay
@@ -203,6 +203,7 @@ GEM
       childprocess (>= 0.5, < 5.0)
       rexml (~> 3.2, >= 3.2.5)
       rubyzip (>= 1.2.2)
+    slack-notifier (2.4.0)
     systemu (2.6.5)
     table_print (1.5.7)
     terminal-table (1.8.0)
@@ -265,6 +266,8 @@ DEPENDENCIES
   rspec_junit_formatter (~> 0.4.1)
   ruby-debug-ide (~> 0.7.0)
   selenium-webdriver (~> 4.0)
+  slack-notifier (~> 2.4)
+  terminal-table (~> 1.8)
   timecop (~> 0.9.1)
   webdrivers (~> 5.0)
   zeitwerk (~> 2.4)
diff --git a/qa/Rakefile b/qa/Rakefile
index f24c81a9ec2d8..57360e98ca2ff 100644
--- a/qa/Rakefile
+++ b/qa/Rakefile
@@ -2,6 +2,7 @@
 # rubocop:disable Rails/RakeEnvironment
 
 load 'tasks/webdrivers.rake'
+load 'tasks/reliable_report.rake'
 
 require_relative 'qa/tools/revoke_all_personal_access_tokens'
 require_relative 'qa/tools/delete_subgroups'
diff --git a/qa/qa/tools/reliable_report.rb b/qa/qa/tools/reliable_report.rb
new file mode 100644
index 0000000000000..9d2079171c161
--- /dev/null
+++ b/qa/qa/tools/reliable_report.rb
@@ -0,0 +1,234 @@
+# frozen_string_literal: true
+
+require "influxdb-client"
+require "terminal-table"
+require "slack-notifier"
+
+module QA
+  module Tools
+    class ReliableReport
+      def initialize(run_type, range = 30)
+        @results = 10
+        @slack_channel = "#quality-reports"
+        @range = range
+        @run_type = run_type
+        @stable_title = "Top #{results} stable specs for past #{@range} days in '#{run_type}' runs"
+        @unstable_title = "Top #{results} unstable reliable specs for past #{@range} days in '#{run_type}' runs"
+      end
+
+      # Print top stable specs
+      #
+      # @return [void]
+      def show_top_stable
+        puts terminal_table(
+          rows: top_stable.map { |k, v| [name_column(k, v[:file]), *table_params(v.values)] },
+          title: stable_title
+        )
+      end
+
+      # Post top stable spec report to slack
+      # Slice table in to multiple messages due to max char limitation
+      #
+      # @return [void]
+      def notify_top_stable
+        tables = top_stable.each_slice(5).map do |slice|
+          terminal_table(
+            rows: slice.map { |spec| [name_column(spec[0], spec[1][:file]), *table_params(spec[1].values)] }
+          )
+        end
+
+        puts "\nSending top stable spec report to #{slack_channel} slack channel"
+        slack_args = { icon_emoji: ":mtg_green:", username: "Stable Spec Report" }
+        notifier.post(text: "*#{stable_title}*", **slack_args)
+        tables.each { |table| notifier.post(text: "```#{table}```", **slack_args) }
+      end
+
+      # Print top unstable specs
+      #
+      # @return [void]
+      def show_top_unstable
+        return puts("No unstable tests present!") if top_unstable_reliable.empty?
+
+        puts terminal_table(
+          rows: top_unstable_reliable.map { |k, v| [name_column(k, v[:file]), *table_params(v.values)] },
+          title: unstable_title
+        )
+      end
+
+      # Post top unstable reliable spec report to slack
+      # Slice table in to multiple messages due to max char limitation
+      #
+      # @return [void]
+      def notify_top_unstable
+        return puts("No unstable tests present!") if top_unstable_reliable.empty?
+
+        tables = top_unstable_reliable.each_slice(5).map do |slice|
+          terminal_table(
+            rows: slice.map { |spec| [name_column(spec[0], spec[1][:file]), *table_params(spec[1].values)] }
+          )
+        end
+
+        puts "\nSending top unstable reliable spec report to #{slack_channel} slack channel"
+        slack_args = { icon_emoji: ":sadpanda:", username: "Unstable Spec Report" }
+        notifier.post(text: "*#{unstable_title}*", **slack_args)
+        tables.each { |table| notifier.post(text: "```#{table}```", **slack_args) }
+      end
+
+      private
+
+      attr_reader :results,
+                  :slack_channel,
+                  :range,
+                  :run_type,
+                  :stable_title,
+                  :unstable_title
+
+      # Top stable specs
+      #
+      # @return [Hash]
+      def top_stable
+        @top_stable ||= runs(reliable: false).sort_by { |k, v| [v[:failure_rate], -v[:runs]] }[0..results - 1].to_h
+      end
+
+      # Top unstable reliable specs
+      #
+      # @return [Hash]
+      def top_unstable_reliable
+        @top_unstable_reliable ||= runs(reliable: true)
+          .reject { |k, v| v[:failure_rate] == 0 }
+          .sort_by { |k, v| -v[:failure_rate] }[0..results - 1]
+          .to_h
+      end
+
+      # Terminal table for result formatting
+      #
+      # @return [Terminal::Table]
+      def terminal_table(rows:, title: nil)
+        Terminal::Table.new(
+          headings: ["name", "runs", "failed", "failure rate"],
+          style: { all_separators: true },
+          title: title,
+          rows: rows
+        )
+      end
+
+      # Spec parameters for table row
+      #
+      # @param [Array] parameters
+      # @return [Array]
+      def table_params(parameters)
+        [*parameters[1..2], "#{parameters.last}%"]
+      end
+
+      # Name column value
+      #
+      # @param [String] name
+      # @param [String] file
+      # @return [String]
+      def name_column(name, file)
+        spec_name = name.length > 100 ? "#{name} ".scan(/.{1,100} /).map(&:strip).join("\n") : name
+        name_line = "name: '#{spec_name}'"
+        file_line = "file: '#{file}'"
+
+        "#{name_line}\n#{file_line.ljust(110)}"
+      end
+
+      # Test executions grouped by name
+      #
+      # @param [Boolean] reliable
+      # @return [Hash]
+      def runs(reliable:)
+        puts("Fetching data on #{reliable ? 'reliable ' : ''}test execution for past 30 days in '#{run_type}' runs")
+        puts
+
+        query_api.query(query: query(reliable)).values.each_with_object({}) do |table, result|
+          records = table.records
+          name = records.last.values["name"]
+          file = records.last.values["file_path"].split("/").last
+          runs = records.count
+          failed = records.count { |r| r.values["status"] == "failed" }
+          failure_rate = (failed.to_f / runs.to_f) * 100
+
+          result[name] = {
+            file: file,
+            runs: runs,
+            failed: failed,
+            failure_rate: failure_rate == 0 ? failure_rate.round(0) : failure_rate.round(2)
+          }
+        end
+      end
+
+      # Flux query
+      #
+      # @param [Boolean] reliable
+      # @return [String]
+      def query(reliable)
+        <<~QUERY
+        from(bucket: "e2e-test-stats")
+          |> range(start: -#{range}d)
+          |> filter(fn: (r) => r._measurement == "test-stats" and
+            r.run_type == "#{run_type}" and
+            r.status != "pending" and
+            r.merge_request == "false" and
+            r.quarantined == "false" and
+            r.reliable == "#{reliable}" and
+            r._field == "id"
+          )
+          |> group(columns: ["name"])
+        QUERY
+      end
+
+      # Query client
+      #
+      # @return [QueryApi]
+      def query_api
+        @query_api ||= influx_client.create_query_api
+      end
+
+      # InfluxDb client
+      #
+      # @return [InfluxDB2::Client]
+      def influx_client
+        @influx_client ||= InfluxDB2::Client.new(
+          influxdb_url,
+          influxdb_token,
+          bucket: "e2e-test-stats",
+          org: "gitlab-qa",
+          precision: InfluxDB2::WritePrecision::NANOSECOND
+        )
+      end
+
+      # Slack notifier
+      #
+      # @return [Slack::Notifier]
+      def notifier
+        @notifier ||= Slack::Notifier.new(
+          slack_webhook_url,
+          channel: slack_channel,
+          username: "Reliable spec reporter"
+        )
+      end
+
+      # InfluxDb instance url
+      #
+      # @return [String]
+      def influxdb_url
+        @influxdb_url ||= ENV["QA_INFLUXDB_URL"] || raise("Missing QA_INFLUXDB_URL environment variable")
+      end
+
+      # Influxdb token
+      #
+      # @return [String]
+      def influxdb_token
+        @influxdb_token ||= ENV["QA_INFLUXDB_TOKEN"] || raise("Missing QA_INFLUXDB_TOKEN environment variable")
+      end
+
+      # Slack webhook url
+      #
+      # @return [String]
+      def slack_webhook_url
+        @slack_webhook_url ||= ENV["CI_SLACK_WEBHOOK_URL"] || raise("Missing CI_SLACK_WEBHOOK_URL environment variable")
+      end
+    end
+  end
+end
diff --git a/qa/spec/tools/reliable_report_spec.rb b/qa/spec/tools/reliable_report_spec.rb
new file mode 100644
index 0000000000000..c7d4d28fb21fc
--- /dev/null
+++ b/qa/spec/tools/reliable_report_spec.rb
@@ -0,0 +1,145 @@
+# frozen_string_literal: true
+
+describe QA::Tools::ReliableReport do
+  include QA::Support::Helpers::StubEnv
+
+  subject(:reporter) { described_class.new(run_type, range) }
+
+  let(:slack_notifier) { instance_double("Slack::Notifier", post: nil) }
+  let(:influx_client) { instance_double("InfluxDB2::Client", create_query_api: query_api) }
+  let(:query_api) { instance_double("InfluxDB2::QueryApi") }
+
+  let(:slack_channel) { "#quality-reports" }
+  let(:run_type) { "package-and-qa" }
+  let(:range) { 30 }
+  let(:results) { 10 }
+
+  let(:runs) { { 0 => stable_spec, 1 => unstable_spec } }
+
+  let(:stable_spec) do
+    spec_values = { "name" => "stable spec", "status" => "passed", "file_path" => "some/spec.rb" }
+    instance_double(
+      "InfluxDB2::FluxTable",
+      records: [
+        instance_double("InfluxDB2::FluxRecord", values: spec_values),
+        instance_double("InfluxDB2::FluxRecord", values: spec_values),
+        instance_double("InfluxDB2::FluxRecord", values: spec_values)
+      ]
+    )
+  end
+
+  let(:unstable_spec) do
+    spec_values = { "name" => "unstable spec", "status" => "failed", "file_path" => "some/spec.rb" }
+    instance_double(
+      "InfluxDB2::FluxTable",
+      records: [
+        instance_double("InfluxDB2::FluxRecord", values: { **spec_values, "status" => "passed" }),
+        instance_double("InfluxDB2::FluxRecord", values: spec_values),
+        instance_double("InfluxDB2::FluxRecord", values: spec_values)
+      ]
+    )
+  end
+
+  def flux_query(reliable)
+    <<~QUERY
+        from(bucket: "e2e-test-stats")
+          |> range(start: -#{range}d)
+          |> filter(fn: (r) => r._measurement == "test-stats" and
+            r.run_type == "#{run_type}" and
+            r.status != "pending" and
+            r.merge_request == "false" and
+            r.quarantined == "false" and
+            r.reliable == "#{reliable}" and
+            r._field == "id"
+          )
+          |> group(columns: ["name"])
+    QUERY
+  end
+
+  def table(rows, title = nil)
+    Terminal::Table.new(
+      headings: ["name", "runs", "failed", "failure rate"],
+      style: { all_separators: true },
+      title: title,
+      rows: rows
+    )
+  end
+
+  def name_column(spec_name)
+    name = "name: '#{spec_name}'"
+    file = "file: 'spec.rb'".ljust(110)
+
+    "#{name}\n#{file}"
+  end
+
+  before do
+    stub_env("QA_INFLUXDB_URL", "url")
+    stub_env("QA_INFLUXDB_TOKEN", "token")
+    stub_env("CI_SLACK_WEBHOOK_URL", "slack_url")
+
+    allow(Slack::Notifier).to receive(:new).and_return(slack_notifier)
+    allow(InfluxDB2::Client).to receive(:new).and_return(influx_client)
+    allow(query_api).to receive(:query).with(query: query).and_return(runs)
+  end
+
+  context "with stable spec report" do
+    let(:query) { flux_query(false) }
+    let(:fetch_message) { "Fetching data on test execution for past #{range} days in '#{run_type}' runs" }
+    let(:slack_send_message) { "Sending top stable spec report to #{slack_channel} slack channel" }
+    let(:title) { "Top #{results} stable specs for past #{range} days in '#{run_type}' runs" }
+    let(:rows) do
+      [
+        [name_column("stable spec"), 3, 0, "0%"],
+        [name_column("unstable spec"), 3, 2, "66.67%"]
+      ]
+    end
+
+    it "prints top stable spec report to console" do
+      expect { reporter.show_top_stable }.to output("#{fetch_message}\n\n#{table(rows, title)}\n").to_stdout
+    end
+
+    it "sends top stable spec report to slack" do
+      slack_args = { icon_emoji: ":mtg_green:", username: "Stable Spec Report" }
+
+      expect { reporter.notify_top_stable }.to output("#{fetch_message}\n\n\n#{slack_send_message}\n").to_stdout
+      expect(slack_notifier).to have_received(:post).with(text: "*#{title}*", **slack_args)
+      expect(slack_notifier).to have_received(:post).with(text: "```#{table(rows)}```", **slack_args)
+    end
+  end
+
+  context "with unstable spec report" do
+    let(:query) { flux_query(true) }
+    let(:fetch_message) { "Fetching data on reliable test execution for past #{range} days in '#{run_type}' runs" }
+    let(:slack_send_message) { "Sending top unstable reliable spec report to #{slack_channel} slack channel" }
+    let(:title) { "Top #{results} unstable reliable specs for past #{range} days in '#{run_type}' runs" }
+    let(:rows) { [[name_column("unstable spec"), 3, 2, "66.67%"]] }
+
+    it "prints top unstable spec report to console" do
+      expect { reporter.show_top_unstable }.to output("#{fetch_message}\n\n#{table(rows, title)}\n").to_stdout
+    end
+
+    it "sends top unstable reliable spec report to slack" do
+      slack_args = { icon_emoji: ":sadpanda:", username: "Unstable Spec Report" }
+
+      expect { reporter.notify_top_unstable }.to output("#{fetch_message}\n\n\n#{slack_send_message}\n").to_stdout
+      expect(slack_notifier).to have_received(:post).with(text: "*#{title}*", **slack_args)
+      expect(slack_notifier).to have_received(:post).with(text: "```#{table(rows)}```", **slack_args)
+    end
+  end
+
+  context "without unstable reliable specs" do
+    let(:query) { flux_query(true) }
+    let(:runs) { { 0 => stable_spec } }
+    let(:fetch_message) { "Fetching data on reliable test execution for past #{range} days in '#{run_type}' runs" }
+    let(:no_result_message) { "No unstable tests present!" }
+
+    it "prints no result message to console" do
+      expect { reporter.show_top_unstable }.to output("#{fetch_message}\n\n#{no_result_message}\n").to_stdout
+    end
+
+    it "skips slack notification" do
+      expect { reporter.notify_top_unstable }.to output("#{fetch_message}\n\n#{no_result_message}\n").to_stdout
+      expect(slack_notifier).not_to have_received(:post)
+    end
+  end
+end
diff --git a/qa/tasks/reliable_report.rake b/qa/tasks/reliable_report.rake
new file mode 100644
index 0000000000000..204c959093aa0
--- /dev/null
+++ b/qa/tasks/reliable_report.rake
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+# rubocop:disable Rails/RakeEnvironment
+
+require_relative "../qa/tools/reliable_report"
+
+desc "Fetch top most reliable specs"
+task :reliable_spec_report, [:run_type, :range, :create_slack_report] do |_task, args|
+  report = QA::Tools::ReliableReport.new(args[:run_type] || "package-and-qa", args[:range])
+
+  report.show_top_stable
+  report.notify_top_stable if args[:create_slack_report] == 'true'
+end
+
+desc "Fetch top most unstable reliable specs"
+task :unreliable_spec_report, [:run_type, :range, :create_slack_report] do |_task, args|
+  report = QA::Tools::ReliableReport.new(args[:run_type] || "package-and-qa", args[:range])
+
+  report.show_top_unstable
+  report.notify_top_unstable if args[:create_slack_report] == 'true'
+end
+# rubocop:enable Rails/RakeEnvironment
-- 
GitLab