From d4c807d1c17102467113b34e4399ac13bf6858a6 Mon Sep 17 00:00:00 2001
From: Steve Abrams <sabrams@gitlab.com>
Date: Wed, 1 Feb 2023 19:00:14 +0000
Subject: [PATCH] Create issue in release/tasks for broken stable

When a stable branch pipeline fails, an incident
issue is created in the release/tasks project.
---
 .gitlab-ci.yml                                |   5 +
 .gitlab/ci/notify.gitlab-ci.yml               |  13 +-
 scripts/api/commit_merge_requests.rb          |  29 +++++
 scripts/create-pipeline-failure-incident.rb   |  90 ++++++++++++-
 .../generate-failed-pipeline-slack-message.rb |   2 +-
 .../scripts/api/commit_merge_requests_spec.rb |  30 +++++
 .../create_pipeline_failure_incident_spec.rb  | 120 ++++++++++++++++++
 7 files changed, 280 insertions(+), 9 deletions(-)
 create mode 100644 scripts/api/commit_merge_requests.rb
 create mode 100644 spec/scripts/api/commit_merge_requests_spec.rb
 create mode 100644 spec/scripts/create_pipeline_failure_incident_spec.rb

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9fb604e7a25fb..abd91495d5e2c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -35,6 +35,8 @@ default:
 .default-branch-incident-variables: &default-branch-incident-variables
   CREATE_INCIDENT_FOR_PIPELINE_FAILURE: "true"
   NOTIFY_PIPELINE_FAILURE_CHANNEL: "master-broken"
+  BROKEN_BRANCH_INCIDENTS_PROJECT: "gitlab-org/quality/engineering-productivity/master-broken-incidents"
+  BROKEN_BRANCH_INCIDENTS_PROJECT_TOKEN: "${BROKEN_MASTER_INCIDENTS_PROJECT_TOKEN}"
 
 workflow:
   name: '$PIPELINE_NAME'
@@ -107,6 +109,9 @@ workflow:
         <<: *ruby2-variables
         NOTIFY_PIPELINE_FAILURE_CHANNEL: "releases"
         PIPELINE_NAME: 'Ruby 2 $CI_COMMIT_BRANCH branch pipeline'
+        CREATE_INCIDENT_FOR_PIPELINE_FAILURE: "true"
+        BROKEN_BRANCH_INCIDENTS_PROJECT: "gitlab-org/release/tasks"
+        BROKEN_BRANCH_INCIDENTS_PROJECT_TOKEN: "${BROKEN_STABLE_INCIDENTS_PROJECT_TOKEN}"
     - if: '$CI_COMMIT_BRANCH =~ /^\d+-\d+-auto-deploy-\d+$/'
       variables:
         <<: *ruby2-variables
diff --git a/.gitlab/ci/notify.gitlab-ci.yml b/.gitlab/ci/notify.gitlab-ci.yml
index 84fb5a55ed19a..bdd7979126f2d 100644
--- a/.gitlab/ci/notify.gitlab-ci.yml
+++ b/.gitlab/ci/notify.gitlab-ci.yml
@@ -43,8 +43,9 @@ notify-pipeline-failure:
     - .notify:rules:notify-pipeline-failure
   image: ${GITLAB_DEPENDENCY_PROXY_ADDRESS}ruby:${RUBY_VERSION}
   variables:
-    BROKEN_MASTER_INCIDENTS_PROJECT: "gitlab-org/quality/engineering-productivity/master-broken-incidents"
-    BROKEN_MASTER_INCIDENT_JSON: "${CI_PROJECT_DIR}/incident.json"
+    INCIDENT_PROJECT: "#{BROKEN_BRANCH_INCIDENTS_PROJECT}"
+    BROKEN_BRANCH_PROJECT_TOKEN: "${BROKEN_BRANCH_INCIDENTS_PROJECT_TOKEN}"
+    INCIDENT_JSON: "${CI_PROJECT_DIR}/incident.json"
     SLACK_CHANNEL: "${NOTIFY_PIPELINE_FAILURE_CHANNEL}"
     FAILED_PIPELINE_SLACK_MESSAGE_FILE: "${CI_PROJECT_DIR}/failed_pipeline_slack_message.json"
   before_script:
@@ -54,17 +55,17 @@ notify-pipeline-failure:
   script:
     - |
       if [[ "${CREATE_INCIDENT_FOR_PIPELINE_FAILURE}" == "true" ]]; then
-        scripts/create-pipeline-failure-incident.rb -p ${BROKEN_MASTER_INCIDENTS_PROJECT} -f ${BROKEN_MASTER_INCIDENT_JSON} -t ${BROKEN_MASTER_INCIDENTS_PROJECT_TOKEN};
-        echosuccess "Created incident $(jq '.web_url' ${BROKEN_MASTER_INCIDENT_JSON})";
+        scripts/create-pipeline-failure-incident.rb -p ${INCIDENT_PROJECT} -f ${INCIDENT_JSON} -t ${BROKEN_BRANCH_PROJECT_TOKEN};
+        echosuccess "Created incident $(jq '.web_url' ${INCIDENT_JSON})";
       fi
     - |
-      scripts/generate-failed-pipeline-slack-message.rb -i ${BROKEN_MASTER_INCIDENT_JSON} -f ${FAILED_PIPELINE_SLACK_MESSAGE_FILE};
+      scripts/generate-failed-pipeline-slack-message.rb -i ${INCIDENT_JSON} -f ${FAILED_PIPELINE_SLACK_MESSAGE_FILE};
       curl -X POST -H 'Content-Type: application/json' --data @${FAILED_PIPELINE_SLACK_MESSAGE_FILE} "$CI_SLACK_WEBHOOK_URL" ||
         scripts/slack ${SLACK_CHANNEL} "☠️  Broken pipeline notification failed! ☠️  See ${CI_JOB_URL}" ci_failing "Failed pipeline reporter"
 
   artifacts:
     paths:
-      - ${BROKEN_MASTER_INCIDENT_JSON}
+      - ${INCIDENT_JSON}
       - ${FAILED_PIPELINE_SLACK_MESSAGE_FILE}
     when: always
     expire_in: 2 days
diff --git a/scripts/api/commit_merge_requests.rb b/scripts/api/commit_merge_requests.rb
new file mode 100644
index 0000000000000..3cf8dc87497da
--- /dev/null
+++ b/scripts/api/commit_merge_requests.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+require 'gitlab'
+require_relative 'default_options'
+
+class CommitMergeRequests
+  def initialize(options)
+    @project = options.fetch(:project)
+    @sha = options.fetch(:sha)
+
+    # If api_token is nil, it's set to '' to allow unauthenticated requests (for forks).
+    api_token = options.fetch(:api_token, '')
+
+    warn "No API token given." if api_token.empty?
+
+    @client = Gitlab.client(
+      endpoint: options.fetch(:endpoint, API::DEFAULT_OPTIONS[:endpoint]),
+      private_token: api_token
+    )
+  end
+
+  def execute
+    client.commit_merge_requests(project, sha)
+  end
+
+  private
+
+  attr_reader :project, :sha, :client
+end
diff --git a/scripts/create-pipeline-failure-incident.rb b/scripts/create-pipeline-failure-incident.rb
index 1035a680291ce..2b86fac680de8 100755
--- a/scripts/create-pipeline-failure-incident.rb
+++ b/scripts/create-pipeline-failure-incident.rb
@@ -5,9 +5,10 @@
 require 'optparse'
 require 'json'
 
-require_relative 'api/pipeline_failed_jobs'
+require_relative 'api/commit_merge_requests'
 require_relative 'api/create_issue'
 require_relative 'api/create_issue_discussion'
+require_relative 'api/pipeline_failed_jobs'
 
 class CreatePipelineFailureIncident
   DEFAULT_OPTIONS = {
@@ -29,6 +30,8 @@ def execute
       labels: incident_labels
     }
 
+    payload[:assignee_ids] = assignee_ids if stable_branch_incident?
+
     CreateIssue.new(project: project, api_token: api_token).execute(payload).tap do |incident|
       CreateIssueDiscussion.new(project: project, api_token: api_token)
         .execute(issue_iid: incident.iid, body: "## Root Cause Analysis")
@@ -41,8 +44,18 @@ def execute
 
   attr_reader :project, :api_token
 
+  def stable_branch_incident?
+    ENV['CI_COMMIT_REF_NAME'] =~ /^[\d-]+-stable(-ee)?$/
+  end
+
   def failed_jobs
-    @failed_jobs ||= PipelineFailedJobs.new(API::DEFAULT_OPTIONS.dup.merge(exclude_allowed_to_fail_jobs: true)).execute
+    @failed_jobs ||= PipelineFailedJobs.new(API::DEFAULT_OPTIONS.merge(exclude_allowed_to_fail_jobs: true)).execute
+  end
+
+  def merge_request
+    @merge_request ||= CommitMergeRequests.new(
+      API::DEFAULT_OPTIONS.merge(sha: ENV['CI_COMMIT_SHA'])
+    ).execute.first
   end
 
   def now
@@ -63,6 +76,12 @@ def title
   end
 
   def description
+    return broken_stable_description_content if stable_branch_incident?
+
+    broken_master_description_content
+  end
+
+  def broken_master_description_content
     <<~MARKDOWN
     ## #{project_link} pipeline #{pipeline_link} failed
 
@@ -107,7 +126,62 @@ def description
     MARKDOWN
   end
 
+  def broken_stable_description_content
+    <<~MARKDOWN
+    ## #{project_link} pipeline #{pipeline_link} failed
+
+    **Branch: #{branch_link}**
+
+    **Commit: #{commit_link}**
+
+    **Merge Request: #{merge_request_link}**
+
+    **Triggered by** #{triggered_by_link} • **Source:** #{source} • **Duration:** #{pipeline_duration} minutes
+
+    **Failed jobs (#{failed_jobs.size}):**
+
+    #{failed_jobs_list}
+
+    ### General guidelines
+
+    A broken stable branch prevents patch releases from being built.
+    Fixing the pipeline is a priority to prevent any delays in releases.
+
+    The process in the [Broken `master` handbook guide](https://about.gitlab.com/handbook/engineering/workflow/#broken-master) can be referenced since much of that process also applies here.
+
+    ### Investigation
+
+    **Be sure to fill the `Timeline` for this incident.**
+
+    1. If the failure is new, and looks like a potential flaky failure, you can retry the failing job.
+      Make sure to mention the retry in the `Timeline` and leave a link to the retried job.
+    1. Search for similar master-broken issues in https://gitlab.com/gitlab-org/quality/engineering-productivity/master-broken-incidents/-/issues
+      1. If one exists, ask the DRI of the master-broken issue to cherry-pick any resulting merge requests into the stable branch
+
+    @gitlab-org/release/managers if the merge request author or maintainer is not available, this can be escalated using the dev-on-call process in the [#dev-escalation slack channel](https://gitlab.slack.com/archives/CLKLMSUR4).
+
+    ### Pre-resolution
+
+    If you believe that there's an easy resolution by either:
+
+    - Reverting a particular merge request.
+    - Making a quick fix (for example, one line or a few similar simple changes in a few lines).
+      You can create a merge request, assign to any available maintainer, and ping people that were involved/related to the introduction of the failure.
+      Additionally, a message can be posted in `#backend_maintainers` or `#frontend_maintainers` to get a maintainer take a look at the fix ASAP.
+    - Cherry picking a change that was used to fix a similar master-broken issue.
+
+    In both cases, make sure to add the ~"pipeline:expedite" label to speed up the `stable`-fixing pipelines.
+
+    ### Resolution
+
+    Add a comment to this issue describing how this incident could have been prevented earlier in the Merge Request pipeline (rather than the merge commit pipeline).
+
+    MARKDOWN
+  end
+
   def incident_labels
+    return ['release-blocker'] if stable_branch_incident?
+
     master_broken_label =
       if ENV['CI_PROJECT_NAME'] == 'gitlab-foss'
         'master:foss-broken'
@@ -118,6 +192,12 @@ def incident_labels
     DEFAULT_LABELS.dup << master_broken_label
   end
 
+  def assignee_ids
+    ids = [ENV['GITLAB_USER_ID'].to_i]
+    ids << merge_request['author']['id'].to_i if merge_request
+    ids
+  end
+
   def pipeline_link
     "[##{ENV['CI_PIPELINE_ID']}](#{ENV['CI_PIPELINE_URL']})"
   end
@@ -134,6 +214,12 @@ def commit_link
     "[#{ENV['CI_COMMIT_TITLE']}](#{ENV['CI_PROJECT_URL']}/-/commit/#{ENV['CI_COMMIT_SHA']})"
   end
 
+  def merge_request_link
+    return 'N/A' unless merge_request
+
+    "[#{merge_request['title']}](#{merge_request['web_url']})"
+  end
+
   def source
     "`#{ENV['CI_PIPELINE_SOURCE']}`"
   end
diff --git a/scripts/generate-failed-pipeline-slack-message.rb b/scripts/generate-failed-pipeline-slack-message.rb
index b695cdfdbeed2..eefdebd5db5ef 100755
--- a/scripts/generate-failed-pipeline-slack-message.rb
+++ b/scripts/generate-failed-pipeline-slack-message.rb
@@ -107,7 +107,7 @@ def incident_button_link
     if incident_exist?
       incident['web_url']
     else
-      "#{ENV['CI_SERVER_URL']}/#{ENV['BROKEN_MASTER_INCIDENTS_PROJECT']}/-/issues/new?" \
+      "#{ENV['CI_SERVER_URL']}/#{ENV['BROKEN_BRANCH_INCIDENTS_PROJECT']}/-/issues/new?" \
         "issuable_template=incident&issue%5Bissue_type%5D=incident"
     end
   end
diff --git a/spec/scripts/api/commit_merge_requests_spec.rb b/spec/scripts/api/commit_merge_requests_spec.rb
new file mode 100644
index 0000000000000..461e3e2e068bb
--- /dev/null
+++ b/spec/scripts/api/commit_merge_requests_spec.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+require_relative '../../../scripts/api/commit_merge_requests'
+
+RSpec.describe CommitMergeRequests, feature_category: :tooling do
+  describe '#execute' do
+    let(:options) do
+      {
+        sha: 'asdf1234',
+        api_token: 'token',
+        project: 12345,
+        endpoint: 'https://example.gitlab.com'
+      }
+    end
+
+    subject { described_class.new(options).execute }
+
+    it 'requests commit_merge_requests from the gitlab client' do
+      expected_result = ['results']
+      client = double('Gitlab::Client', commit_merge_requests: expected_result) # rubocop:disable RSpec/VerifiedDoubles
+
+      expect(Gitlab).to receive(:client)
+        .with(endpoint: options[:endpoint], private_token: options[:api_token])
+        .and_return(client)
+
+      expect(subject).to eq(expected_result)
+    end
+  end
+end
diff --git a/spec/scripts/create_pipeline_failure_incident_spec.rb b/spec/scripts/create_pipeline_failure_incident_spec.rb
new file mode 100644
index 0000000000000..8549cec1b12c5
--- /dev/null
+++ b/spec/scripts/create_pipeline_failure_incident_spec.rb
@@ -0,0 +1,120 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+require_relative '../../scripts/create-pipeline-failure-incident'
+require_relative '../support/helpers/stub_env'
+
+RSpec.describe CreatePipelineFailureIncident, feature_category: :tooling do
+  include StubENV
+
+  describe '#execute' do
+    let(:create_issue) { instance_double(CreateIssue) }
+    let(:issue) { double('Issue', iid: 1) } # rubocop:disable RSpec/VerifiedDoubles
+    let(:create_issue_discussion) { instance_double(CreateIssueDiscussion, execute: true) }
+    let(:failed_jobs) { instance_double(PipelineFailedJobs, execute: []) }
+
+    let(:options) do
+      {
+        project: 1234,
+        api_token: 'asdf1234'
+      }
+    end
+
+    let(:issue_params) do
+      {
+        issue_type: 'incident',
+        title: title,
+        description: description,
+        labels: incident_labels
+      }
+    end
+
+    subject { described_class.new(options).execute }
+
+    before do
+      stub_env(
+        'CI_COMMIT_SHA' => 'bfcd2b9b5cad0b889494ce830697392c8ca11257',
+        'CI_PROJECT_PATH' => 'gitlab.com/gitlab-org/gitlab',
+        'CI_PROJECT_NAME' => 'gitlab',
+        'GITLAB_USER_ID' => '1111',
+        'CI_PROJECT_ID' => '13083',
+        'CI_PIPELINE_ID' => '1234567',
+        'CI_PIPELINE_URL' => 'https://gitlab.com/gitlab-org/gitlab/-/pipelines/1234567',
+        'CI_PROJECT_URL' => 'https://gitlab.com/gitlab-org/gitlab',
+        'CI_PIPELINE_CREATED_AT' => '2023-01-24 00:00:00',
+        'CI_COMMIT_TITLE' => 'Commit title',
+        'CI_PIPELINE_SOURCE' => 'push',
+        'GITLAB_USER_NAME' => 'Foo User',
+        'PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE' => 'asdf1234',
+        'CI_SERVER_URL' => 'https://gitlab.com',
+        'GITLAB_USER_LOGIN' => 'foo'
+      )
+    end
+
+    shared_examples 'creating an issue' do
+      it 'successfully creates an issue' do
+        allow(PipelineFailedJobs).to receive(:new)
+          .with(API::DEFAULT_OPTIONS.merge(exclude_allowed_to_fail_jobs: true))
+          .and_return(failed_jobs)
+
+        expect(CreateIssue).to receive(:new)
+          .with(project: options[:project], api_token: options[:api_token])
+          .and_return(create_issue)
+
+        expect(CreateIssueDiscussion).to receive(:new)
+          .with(project: options[:project], api_token: options[:api_token])
+          .and_return(create_issue_discussion).twice
+
+        expect(create_issue).to receive(:execute)
+          .with(issue_params).and_return(issue)
+
+        expect(subject).to eq(issue)
+      end
+    end
+
+    context 'when stable branch' do
+      let(:incident_labels) { ['release-blocker'] }
+      let(:title) { /broken `15-6-stable-ee`/ }
+      let(:description) { /A broken stable branch prevents patch releases/ }
+
+      let(:commit_merge_request) do
+        {
+          'author' => {
+            'id' => '2'
+          },
+          'title' => 'foo',
+          'web_url' => 'https://gitlab.com/test'
+        }
+      end
+
+      let(:merge_request) { instance_double(CommitMergeRequests, execute: [commit_merge_request]) }
+      let(:issue_params) { super().merge(assignee_ids: [1111, 2]) }
+
+      before do
+        stub_env(
+          'CI_COMMIT_REF_NAME' => '15-6-stable-ee'
+        )
+
+        allow(CommitMergeRequests).to receive(:new)
+          .with(API::DEFAULT_OPTIONS.merge(sha: ENV['CI_COMMIT_SHA']))
+          .and_return(merge_request)
+      end
+
+      it_behaves_like 'creating an issue'
+    end
+
+    context 'when other branch' do
+      let(:incident_labels) { ['Engineering Productivity', 'master-broken::undetermined', 'master:broken'] }
+      let(:title) { /broken `master`/ }
+      let(:description) { /Follow the \[Broken `master` handbook guide\]/ }
+
+      before do
+        stub_env(
+          'CI_COMMIT_REF_NAME' => 'master'
+        )
+      end
+
+      it_behaves_like 'creating an issue'
+    end
+  end
+end
-- 
GitLab