From fdba060146a5952726d02d036eca8581ba6b022e Mon Sep 17 00:00:00 2001
From: Andrew Evans <aevans@gitlab.com>
Date: Mon, 26 Feb 2024 15:10:22 +0000
Subject: [PATCH] Additional cleanup for external usernames on oAuth and LDAP
 sign up

Currently if a user joins a GitLab instance via oAuth, LDAP, or SCIM and
their username begins with multiple `--` characters, these characters
are removed to ensure a valid username can be created for the new user.
Illegal characters such as `*&^()` are removed, and illegal extensions
such as `.git` and `.atom` are also removed.

This change extends the behavior to include all leading legal
characters: `-` , `_` and `.` . It also trims from the end of the
potential username, and removes consecutive legal characters so the
resulting username will pass the check for
`Gitlab::Regex.oci_repository_path_regex` .

Changelog: changed
---
 .../extra_slug_path_sanitization.yml          |  9 ++++++
 .../gitlab/scim/base_provisioning_service.rb  |  9 ++++--
 .../scim/group/provisioning_service_spec.rb   | 30 +++++++++++++++++++
 .../gitlab/scim/provisioning_service_spec.rb  | 30 +++++++++++++++++++
 .../auth/external_username_sanitizer.rb       | 27 +++++++++++++++++
 lib/gitlab/auth/o_auth/user.rb                | 12 ++++++--
 .../auth/external_username_sanitizer_spec.rb  | 29 ++++++++++++++++++
 spec/lib/gitlab/auth/o_auth/user_spec.rb      | 25 ++++++++++++++++
 8 files changed, 166 insertions(+), 5 deletions(-)
 create mode 100644 config/feature_flags/gitlab_com_derisk/extra_slug_path_sanitization.yml
 create mode 100644 lib/gitlab/auth/external_username_sanitizer.rb
 create mode 100644 spec/lib/gitlab/auth/external_username_sanitizer_spec.rb

diff --git a/config/feature_flags/gitlab_com_derisk/extra_slug_path_sanitization.yml b/config/feature_flags/gitlab_com_derisk/extra_slug_path_sanitization.yml
new file mode 100644
index 000000000000..d49227fe8f33
--- /dev/null
+++ b/config/feature_flags/gitlab_com_derisk/extra_slug_path_sanitization.yml
@@ -0,0 +1,9 @@
+---
+name: extra_slug_path_sanitization
+feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/439623
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/145038
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/442650
+milestone: '16.10'
+group: group::authentication
+type: gitlab_com_derisk
+default_enabled: false
diff --git a/ee/lib/ee/gitlab/scim/base_provisioning_service.rb b/ee/lib/ee/gitlab/scim/base_provisioning_service.rb
index d03e7af9a0e2..c94313e75d41 100644
--- a/ee/lib/ee/gitlab/scim/base_provisioning_service.rb
+++ b/ee/lib/ee/gitlab/scim/base_provisioning_service.rb
@@ -33,9 +33,12 @@ def random_password
         end
 
         def valid_username
-          clean_username = ::Namespace.clean_path(@parsed_hash[:username])
-
-          ::Gitlab::Utils::Uniquify.new.string(clean_username) { |s| !NamespacePathValidator.valid_path?(s) }
+          if ::Feature.enabled?(:extra_slug_path_sanitization)
+            ::Gitlab::Auth::ExternalUsernameSanitizer.new(@parsed_hash[:username]).sanitize
+          else
+            clean_username = ::Namespace.clean_path(@parsed_hash[:username])
+            ::Gitlab::Utils::Uniquify.new.string(clean_username) { |s| !NamespacePathValidator.valid_path?(s) }
+          end
         end
 
         def missing_params
diff --git a/ee/spec/lib/ee/gitlab/scim/group/provisioning_service_spec.rb b/ee/spec/lib/ee/gitlab/scim/group/provisioning_service_spec.rb
index cf6eaf89ca39..5810a8c4109a 100644
--- a/ee/spec/lib/ee/gitlab/scim/group/provisioning_service_spec.rb
+++ b/ee/spec/lib/ee/gitlab/scim/group/provisioning_service_spec.rb
@@ -143,6 +143,36 @@ def user
           expect(user).to be_confirmed
         end
       end
+
+      context 'when username contains special characters' do
+        subject(:result) { service.execute }
+
+        let_it_be(:service_params) do
+          {
+            email: 'work@example.com',
+            name: 'Test Name',
+            extern_uid: 'test_uid',
+            username: ' --ricky.^#!__the._raccoon--'
+          }
+        end
+
+        it 'sanitizes more special characters from the username' do
+          expect { result }.to change { User.count }.by(1)
+          expect(user.username).to eq('ricky.the.raccoon')
+        end
+
+        context 'and extra_slug_sanitization FF is disabled' do
+          before do
+            stub_feature_flags(extra_slug_path_sanitization: false)
+          end
+
+          it 'cannot create a user due to invalid username' do
+            expect { result }.not_to change { User.count }
+            expect(result.status).to eq(:error)
+            expect(result.message).to include(Gitlab::Regex.oci_repository_path_regex_message)
+          end
+        end
+      end
     end
 
     context 'when a provisioning error occurs' do
diff --git a/ee/spec/lib/ee/gitlab/scim/provisioning_service_spec.rb b/ee/spec/lib/ee/gitlab/scim/provisioning_service_spec.rb
index 561175775c5c..a5ce4092fab5 100644
--- a/ee/spec/lib/ee/gitlab/scim/provisioning_service_spec.rb
+++ b/ee/spec/lib/ee/gitlab/scim/provisioning_service_spec.rb
@@ -92,6 +92,36 @@ def user
           expect { service.execute }.to change { User.count }.by(1)
         end
       end
+
+      context 'when username contains invalid characters' do
+        subject(:result) { service.execute }
+
+        let_it_be(:service_params) do
+          {
+            email: 'work@example.com',
+            name: 'Test Name',
+            extern_uid: 'test_uid',
+            username: ' --ricky.^#!__the._raccoon--'
+          }
+        end
+
+        it 'sanitizes more special characters from the username' do
+          expect { result }.to change { User.count }.by(1)
+          expect(user.username).to eq('ricky.the.raccoon')
+        end
+
+        context 'and extra_slug_sanitization FF is disabled' do
+          before do
+            stub_feature_flags(extra_slug_path_sanitization: false)
+          end
+
+          it 'cannot create user due to invalid username' do
+            expect { result }.not_to change { User.count }
+            expect(result.status).to eq(:error)
+            expect(result.message).to include(Gitlab::Regex.oci_repository_path_regex_message)
+          end
+        end
+      end
     end
 
     context 'when invalid params' do
diff --git a/lib/gitlab/auth/external_username_sanitizer.rb b/lib/gitlab/auth/external_username_sanitizer.rb
new file mode 100644
index 000000000000..d8b64783fc63
--- /dev/null
+++ b/lib/gitlab/auth/external_username_sanitizer.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Auth
+    class ExternalUsernameSanitizer
+      attr_reader :external_username
+
+      def initialize(external_username)
+        @external_username = external_username
+      end
+
+      def sanitize
+        # remove most characters illegal in usernames / slugs
+        valid_username = ::Namespace.clean_path(external_username)
+        # remove leading - , _ , or . characters not removed by Namespace.clean_path
+        valid_username = valid_username.sub(/\A[_.-]+/, '')
+        # remove trailing - , _ or . characters not removed by Namespace.clean_path
+        valid_username = valid_username.sub(/[_.-]+\z/, '')
+        # remove consecutive - , _ , or . characters
+        valid_username = valid_username.gsub(/([_.-])[_.-]+/, '\1')
+        Gitlab::Utils::Uniquify.new.string(valid_username) do |s|
+          !NamespacePathValidator.valid_path?(s)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/auth/o_auth/user.rb b/lib/gitlab/auth/o_auth/user.rb
index d70c788dac88..4dfa8ee0a415 100644
--- a/lib/gitlab/auth/o_auth/user.rb
+++ b/lib/gitlab/auth/o_auth/user.rb
@@ -232,8 +232,7 @@ def user_attributes
           name ||= auth_hash.name
           email ||= auth_hash.email
 
-          valid_username = ::Namespace.clean_path(username)
-          valid_username = Gitlab::Utils::Uniquify.new.string(valid_username) { |s| !NamespacePathValidator.valid_path?(s) }
+          valid_username = sanitize_username(username)
 
           {
             name: name.strip.presence || valid_username,
@@ -245,6 +244,15 @@ def user_attributes
           }
         end
 
+        def sanitize_username(username)
+          if Feature.enabled?(:extra_slug_path_sanitization)
+            ExternalUsernameSanitizer.new(username).sanitize
+          else
+            valid_username = ::Namespace.clean_path(username)
+            Gitlab::Utils::Uniquify.new.string(valid_username) { |s| !NamespacePathValidator.valid_path?(s) }
+          end
+        end
+
         def sync_profile_from_provider?
           Gitlab::Auth::OAuth::Provider.sync_profile_from_provider?(auth_hash.provider)
         end
diff --git a/spec/lib/gitlab/auth/external_username_sanitizer_spec.rb b/spec/lib/gitlab/auth/external_username_sanitizer_spec.rb
new file mode 100644
index 000000000000..21837f84e902
--- /dev/null
+++ b/spec/lib/gitlab/auth/external_username_sanitizer_spec.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Auth::ExternalUsernameSanitizer, feature_category: :system_access do
+  subject(:sanitized_name) { described_class.new(external_username).sanitize }
+
+  describe '#sanitize' do
+    using RSpec::Parameterized::TableSyntax
+
+    where(:external_username, :output) do
+      'alice'                         | 'alice'
+      'admin'                         | 'admin1'
+      'testy.git'                     | 'testy'
+      '___carly_the_capybara'         | 'carly_the_capybara'
+      'shingo___the...shiba---inu'    | 'shingo_the.shiba-inu'
+      'francis-the-ferret-'           | 'francis-the-ferret'
+      '___opie.-_!the$_#^^opossum---' | 'opie.the_opossum'
+      ' --ricky.^#!__the._raccoon--'  | 'ricky.the.raccoon'
+      '*&$amy_the_armadillo'          | 'amy_the_armadillo'
+      'bobby-the-badger$!()'          | 'bobby-the-badger'
+      'denise^&*the!dhole'            | 'denisethedhole'
+    end
+
+    with_them do
+      it { is_expected.to eq(output) }
+    end
+  end
+end
diff --git a/spec/lib/gitlab/auth/o_auth/user_spec.rb b/spec/lib/gitlab/auth/o_auth/user_spec.rb
index c137ca88589e..423fbfaf5c9c 100644
--- a/spec/lib/gitlab/auth/o_auth/user_spec.rb
+++ b/spec/lib/gitlab/auth/o_auth/user_spec.rb
@@ -922,6 +922,31 @@ def result_identities(dn, uid)
         expect(gl_user.username).to eq('admin1')
       end
     end
+
+    context 'with leading or trailing _.- characters in username' do
+      let(:info_hash) do
+        {
+          nickname: '___opie.-_!the$_#^^opossum---',
+          email: 'admin@othermail.com'
+        }
+      end
+
+      it 'creates valid user with sanitized username' do
+        expect(gl_user).to be_valid
+        expect(gl_user.username).to eq('opie.the_opossum')
+      end
+
+      context 'and extra_slug_path_sanitization feature is disabled' do
+        before do
+          stub_feature_flags(extra_slug_path_sanitization: false)
+        end
+
+        it 'fails to create user' do
+          expect(gl_user).not_to be_valid
+          expect(gl_user.errors[:username]).to be_present
+        end
+      end
+    end
   end
 
   describe 'updating email with sync profile' do
-- 
GitLab