From a4341683d6a6e7cdf6692357e96af392dd9ab1e5 Mon Sep 17 00:00:00 2001
From: Lin Jen-Shin <jen-shin@gitlab.com>
Date: Wed, 27 Apr 2022 00:49:09 +0800
Subject: [PATCH] Add a script to generate the auth section for CODEOWNER

---
 .../lib/tooling/find_codeowners_spec.rb       | 158 ++++++++++++++++++
 tooling/bin/find_codeowners                   |   6 +
 tooling/config/CODEOWNERS.yml                 |  30 ++++
 tooling/lib/tooling/find_codeowners.rb        | 100 +++++++++++
 4 files changed, 294 insertions(+)
 create mode 100644 spec/tooling/lib/tooling/find_codeowners_spec.rb
 create mode 100755 tooling/bin/find_codeowners
 create mode 100644 tooling/config/CODEOWNERS.yml
 create mode 100644 tooling/lib/tooling/find_codeowners.rb

diff --git a/spec/tooling/lib/tooling/find_codeowners_spec.rb b/spec/tooling/lib/tooling/find_codeowners_spec.rb
new file mode 100644
index 000000000000..8172992dc8c2
--- /dev/null
+++ b/spec/tooling/lib/tooling/find_codeowners_spec.rb
@@ -0,0 +1,158 @@
+# frozen_string_literal: true
+
+require_relative '../../../../tooling/lib/tooling/find_codeowners'
+
+RSpec.describe Tooling::FindCodeowners do
+  describe '#run' do
+    before do
+      allow(described_class).to receive(:git_ls_files).and_return(<<~LINES)
+        dir/0/0/0
+        dir/0/0/2
+        dir/0/1
+        dir/1
+        dir/2
+      LINES
+
+      find_results = {
+        'dir/0/0' => "dir/0/0\ndir/0/0/0\ndir/0/0/2\n",
+        'dir/0' => "dir/0\ndir/0/0/0\ndir/0/0/2\ndir/0/1\n",
+        'dir' => "dir\ndir/0/0/0\ndir/0/0/2\ndir/0/1\ndir/1\ndir/2\n"
+      }
+
+      allow(described_class).to receive(:find_dir_maxdepth_1) do |dir|
+        find_results[dir]
+      end
+
+      allow(described_class).to receive(:load_config).and_return(
+        '[Section name]': {
+          '@group': {
+            allow: {
+              keywords: ['dir'],
+              patterns: ['/%{keyword}/**/*']
+            },
+            deny: {
+              keywords: ['1'],
+              patterns: ['**/%{keyword}']
+            }
+          }
+        }
+      )
+    end
+
+    it 'prints CODEOWNERS as configured' do
+      expect { described_class.run }.to output(<<~CODEOWNERS).to_stdout
+        [Section name]
+        /dir/0/0 @group
+        /dir/2 @group
+      CODEOWNERS
+    end
+  end
+
+  describe '#load_definitions' do
+    it 'expands the allow and deny list with keywords and patterns' do
+      described_class.load_definitions.each do |section, group_defintions|
+        group_defintions.each do |group, definitions|
+          expect(definitions[:allow]).to be_an(Array)
+          expect(definitions[:deny]).to be_an(Array)
+        end
+      end
+    end
+
+    it 'expands the auth group' do
+      auth = described_class.load_definitions.dig(
+        :'[Authentication and Authorization]',
+        :'@gitlab-org/manage/authentication-and-authorization')
+
+      expect(auth).to eq(
+        allow: %w[
+          /{,ee/}app/**/*password*{/**/*,}
+          /{,ee/}config/**/*password*{/**/*,}
+          /{,ee/}lib/**/*password*{/**/*,}
+          /{,ee/}app/**/*auth*{/**/*,}
+          /{,ee/}config/**/*auth*{/**/*,}
+          /{,ee/}lib/**/*auth*{/**/*,}
+          /{,ee/}app/**/*token*{/**/*,}
+          /{,ee/}config/**/*token*{/**/*,}
+          /{,ee/}lib/**/*token*{/**/*,}
+        ],
+        deny: %w[
+          **/*author.*{/**/*,}
+          **/*author_*{/**/*,}
+          **/*authored*{/**/*,}
+          **/*authoring*{/**/*,}
+          **/*.png*{/**/*,}
+          **/*.svg*{/**/*,}
+          **/*deploy_token*{/**/*,}
+          **/*runner{,s}_token*{/**/*,}
+          **/*job_token*{/**/*,}
+          **/*tokenizer*{/**/*,}
+          **/*filtered_search*{/**/*,}
+        ]
+      )
+    end
+  end
+
+  describe '#load_config' do
+    it 'loads the config with symbolized keys' do
+      config = described_class.load_config
+
+      expect_hash_keys_to_be_symbols(config)
+    end
+
+    def expect_hash_keys_to_be_symbols(object)
+      if object.is_a?(Hash)
+        object.each do |key, value|
+          expect(key).to be_a(Symbol)
+
+          expect_hash_keys_to_be_symbols(value)
+        end
+      end
+    end
+  end
+
+  describe '#path_matches?' do
+    let(:pattern) { 'pattern' }
+    let(:path) { 'path' }
+
+    it 'passes flags we are expecting to File.fnmatch?' do
+      expected_flags =
+        ::File::FNM_DOTMATCH | ::File::FNM_PATHNAME | ::File::FNM_EXTGLOB
+
+      expect(File).to receive(:fnmatch?).with(pattern, path, expected_flags)
+
+      described_class.path_matches?(pattern, path)
+    end
+  end
+
+  describe '#consolidate_paths' do
+    before do
+      allow(described_class).to receive(:find_dir_maxdepth_1).and_return(<<~LINES)
+        dir
+        dir/0
+        dir/2
+        dir/3
+        dir/1
+      LINES
+    end
+
+    context 'when the directory has the same number of entries' do
+      let(:input_paths) { %W[dir/0\n dir/1\n dir/2\n dir/3\n] }
+
+      it 'consolidates into the directory' do
+        paths = described_class.consolidate_paths(input_paths)
+
+        expect(paths).to eq(["dir\n"])
+      end
+    end
+
+    context 'when the directory has different number of entries' do
+      let(:input_paths) { %W[dir/0\n dir/1\n dir/2\n] }
+
+      it 'returns the original paths' do
+        paths = described_class.consolidate_paths(input_paths)
+
+        expect(paths).to eq(input_paths)
+      end
+    end
+  end
+end
diff --git a/tooling/bin/find_codeowners b/tooling/bin/find_codeowners
new file mode 100755
index 000000000000..6cf1bab53aa9
--- /dev/null
+++ b/tooling/bin/find_codeowners
@@ -0,0 +1,6 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require_relative '../lib/tooling/find_codeowners'
+
+Tooling::FindCodeowners.run
diff --git a/tooling/config/CODEOWNERS.yml b/tooling/config/CODEOWNERS.yml
new file mode 100644
index 000000000000..9ef22c267d56
--- /dev/null
+++ b/tooling/config/CODEOWNERS.yml
@@ -0,0 +1,30 @@
+# This is supposed to be used with:
+#     tooling/bin/find_codeowners tooling/config/CODEOWNERS.yml
+# And paste the contents into .gitlab/CODEOWNERS
+
+'[Authentication and Authorization]':
+  '@gitlab-org/manage/authentication-and-authorization':
+    allow:
+      keywords:
+        - password
+        - auth
+        - token
+      patterns:
+        - '/{,ee/}app/**/*%{keyword}*{/**/*,}'
+        - '/{,ee/}config/**/*%{keyword}*{/**/*,}'
+        - '/{,ee/}lib/**/*%{keyword}*{/**/*,}'
+    deny:
+      keywords:
+        - author.
+        - author_
+        - authored
+        - authoring
+        - .png
+        - .svg
+        - deploy_token
+        - runner{,s}_token
+        - job_token
+        - tokenizer
+        - filtered_search
+      patterns:
+        - '**/*%{keyword}*{/**/*,}'
diff --git a/tooling/lib/tooling/find_codeowners.rb b/tooling/lib/tooling/find_codeowners.rb
new file mode 100644
index 000000000000..a7d00625e9aa
--- /dev/null
+++ b/tooling/lib/tooling/find_codeowners.rb
@@ -0,0 +1,100 @@
+# frozen_string_literal: true
+
+require 'yaml'
+
+module Tooling
+  module FindCodeowners
+    module_function
+
+    def run
+      ls_files = git_ls_files
+
+      load_definitions.each do |section, group_defintions|
+        puts section
+
+        group_defintions.each do |group, allow:, deny:|
+          matched_files = ls_files.each_line.select do |line|
+            allow.find do |pattern|
+              path = "/#{line.chomp}"
+
+              path_matches?(pattern, path) &&
+                deny.none? { |pattern| path_matches?(pattern, path) }
+            end
+          end
+
+          consolidated = consolidate_paths(matched_files)
+          consolidated_again = consolidate_paths(consolidated)
+
+          while consolidated_again.size < consolidated.size
+            consolidated = consolidated_again
+            consolidated_again = consolidate_paths(consolidated)
+          end
+
+          consolidated.each do |file|
+            puts "/#{file.chomp} #{group}"
+          end
+        end
+      end
+    end
+
+    def load_definitions
+      result = load_config
+
+      result.each do |section, group_defintions|
+        group_defintions.each do |group, definitions|
+          definitions.transform_values! do |keywords:, patterns:|
+            keywords.flat_map do |keyword|
+              patterns.map do |pattern|
+                pattern % { keyword: keyword }
+              end
+            end
+          end
+        end
+      end
+
+      result
+    end
+
+    def load_config
+      config_path = "#{__dir__}/../../config/CODEOWNERS.yml"
+
+      if YAML.respond_to?(:safe_load_file) # Ruby 3.0+
+        YAML.safe_load_file(config_path, symbolize_names: true)
+      else
+        YAML.safe_load(File.read(config_path), symbolize_names: true)
+      end
+    end
+
+    # Copied and modified from ee/lib/gitlab/code_owners/file.rb
+    def path_matches?(pattern, path)
+      # `FNM_DOTMATCH` makes sure we also match files starting with a `.`
+      # `FNM_PATHNAME` makes sure ** matches path separators
+      flags = ::File::FNM_DOTMATCH | ::File::FNM_PATHNAME
+
+      # BEGIN extension
+      flags |= ::File::FNM_EXTGLOB
+      # END extension
+
+      ::File.fnmatch?(pattern, path, flags)
+    end
+
+    def consolidate_paths(matched_files)
+      matched_files.group_by(&File.method(:dirname)).flat_map do |dir, files|
+        # First line is the dir itself
+        if find_dir_maxdepth_1(dir).lines.drop(1).sort == files.sort
+          "#{dir}\n"
+        else
+          files
+        end
+      end.sort
+    end
+
+    def git_ls_files
+      `git ls-files`
+    end
+
+    def find_dir_maxdepth_1(dir)
+      `find #{dir} -maxdepth 1`
+    end
+  end
+end
-- 
GitLab