Skip to content
代码片段 群组 项目
未验证 提交 41d942c9 编辑于 作者: Rémy Coutable's avatar Rémy Coutable
浏览文件

Generalize the RSpec pipeline generation script


Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
上级 b167ad37
No related branches found
No related tags found
无相关合并请求
...@@ -159,7 +159,7 @@ variables: ...@@ -159,7 +159,7 @@ variables:
JUNIT_RETRY_FILE: rspec/junit_rspec-retry.xml JUNIT_RETRY_FILE: rspec/junit_rspec-retry.xml
KNAPSACK_RSPEC_SUITE_REPORT_PATH: knapsack/report-master.json KNAPSACK_RSPEC_SUITE_REPORT_PATH: knapsack/report-master.json
RSPEC_CHANGED_FILES_PATH: rspec/changed_files.txt RSPEC_CHANGED_FILES_PATH: rspec/changed_files.txt
RSPEC_FOSS_IMPACT_PIPELINE_YML: rspec-foss-impact-pipeline.yml RSPEC_FOSS_IMPACT_PIPELINE_TEMPLATE_YML: .gitlab/ci/rails/rspec-foss-impact.gitlab-ci.yml.erb
RSPEC_LAST_RUN_RESULTS_FILE: rspec/rspec_last_run_results.txt RSPEC_LAST_RUN_RESULTS_FILE: rspec/rspec_last_run_results.txt
RSPEC_MATCHING_JS_FILES_PATH: rspec/js_matching_files.txt RSPEC_MATCHING_JS_FILES_PATH: rspec/js_matching_files.txt
RSPEC_MATCHING_TESTS_PATH: rspec/matching_tests.txt RSPEC_MATCHING_TESTS_PATH: rspec/matching_tests.txt
......
...@@ -821,13 +821,14 @@ rspec-foss-impact:pipeline-generate: ...@@ -821,13 +821,14 @@ rspec-foss-impact:pipeline-generate:
extends: extends:
- .rails:rules:rspec-foss-impact - .rails:rules:rspec-foss-impact
stage: prepare stage: prepare
needs: ["detect-tests"] needs: ["detect-tests", "retrieve-tests-metadata"]
script: script:
- scripts/generate-rspec-foss-impact-pipeline "${RSPEC_MATCHING_TESTS_FOSS_PATH}" "${RSPEC_FOSS_IMPACT_PIPELINE_YML}" - scripts/generate_rspec_pipeline.rb -f "${RSPEC_MATCHING_TESTS_FOSS_PATH}" -t "${RSPEC_FOSS_IMPACT_PIPELINE_TEMPLATE_YML}" -k "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
- cat "${RSPEC_FOSS_IMPACT_PIPELINE_TEMPLATE_YML}.yml"
artifacts: artifacts:
expire_in: 1 day expire_in: 1 day
paths: paths:
- $RSPEC_FOSS_IMPACT_PIPELINE_YML - "${RSPEC_FOSS_IMPACT_PIPELINE_TEMPLATE_YML}.yml"
rspec-foss-impact:trigger: rspec-foss-impact:trigger:
extends: extends:
...@@ -850,7 +851,7 @@ rspec-foss-impact:trigger: ...@@ -850,7 +851,7 @@ rspec-foss-impact:trigger:
yaml_variables: true yaml_variables: true
pipeline_variables: true pipeline_variables: true
include: include:
- artifact: $RSPEC_FOSS_IMPACT_PIPELINE_YML - artifact: "${RSPEC_FOSS_IMPACT_PIPELINE_TEMPLATE_YML}.yml"
job: rspec-foss-impact:pipeline-generate job: rspec-foss-impact:pipeline-generate
fail-pipeline-early: fail-pipeline-early:
......
...@@ -21,7 +21,7 @@ dont-interrupt-me: ...@@ -21,7 +21,7 @@ dont-interrupt-me:
script: script:
- echo "This jobs makes sure this pipeline won't be interrupted! See https://docs.gitlab.com/ee/ci/yaml/#interruptible." - echo "This jobs makes sure this pipeline won't be interrupted! See https://docs.gitlab.com/ee/ci/yaml/#interruptible."
rspec foss-impact: .base-rspec-foss-impact:
extends: .rspec-base-pg12-as-if-foss extends: .rspec-base-pg12-as-if-foss
needs: needs:
- pipeline: $PARENT_PIPELINE_ID - pipeline: $PARENT_PIPELINE_ID
...@@ -37,9 +37,6 @@ rspec foss-impact: ...@@ -37,9 +37,6 @@ rspec foss-impact:
variables: variables:
RSPEC_TESTS_FILTER_FILE: "${RSPEC_MATCHING_TESTS_FOSS_PATH}" RSPEC_TESTS_FILTER_FILE: "${RSPEC_MATCHING_TESTS_FOSS_PATH}"
RSPEC_TESTS_MAPPING_ENABLED: "true" RSPEC_TESTS_MAPPING_ENABLED: "true"
<% if Integer(parallel_value) > 1 %>
parallel: <%= parallel_value %>
<% end %>
script: script:
- !reference [.base-script, script] - !reference [.base-script, script]
- rspec_paralellized_job "--tag ~quarantine --tag ~level:migration --tag ~zoekt" - rspec_paralellized_job "--tag ~quarantine --tag ~level:migration --tag ~zoekt"
...@@ -48,3 +45,46 @@ rspec foss-impact: ...@@ -48,3 +45,46 @@ rspec foss-impact:
paths: paths:
- "${RSPEC_MATCHING_TESTS_FOSS_PATH}" - "${RSPEC_MATCHING_TESTS_FOSS_PATH}"
- tmp/capybara/ - tmp/capybara/
<% if rspec_files_per_test_level[:migration][:files].size > 0 %>
rspec migration foss-impact:
extends: .base-rspec-foss-impact
<% if rspec_files_per_test_level[:migration][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:migration][:parallelization] %>
<% end %>
script:
- !reference [.base-script, script]
- rspec_paralellized_job "--tag ~quarantine --tag ~zoekt"
<% end %>
<% if rspec_files_per_test_level[:background_migration][:files].size > 0 %>
rspec background_migration foss-impact:
extends: .base-rspec-foss-impact
<% if rspec_files_per_test_level[:background_migration][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:background_migration][:parallelization] %>
<% end %>
<% end %>
<% if rspec_files_per_test_level[:unit][:files].size > 0 %>
rspec unit foss-impact:
extends: .base-rspec-foss-impact
<% if rspec_files_per_test_level[:unit][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:unit][:parallelization] %>
<% end %>
<% end %>
<% if rspec_files_per_test_level[:integration][:files].size > 0 %>
rspec integration foss-impact:
extends: .base-rspec-foss-impact
<% if rspec_files_per_test_level[:integration][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:integration][:parallelization] %>
<% end %>
<% end %>
<% if rspec_files_per_test_level[:system][:files].size > 0 %>
rspec system foss-impact:
extends: .base-rspec-foss-impact
<% if rspec_files_per_test_level[:system][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:system][:parallelization] %>
<% end %>
<% end %>
#!/usr/bin/env bash
set -euo pipefail
# Script to generate `rspec foss-impact` test child pipeline with dynamically parallelized jobs.
source scripts/utils.sh
rspec_matching_tests_foss_path="${1}"
pipeline_yml="${2}"
test_file_count=$(wc -w "${rspec_matching_tests_foss_path}" | awk '{ print $1 }')
echoinfo "test_file_count: ${test_file_count}"
if [[ "${test_file_count}" -eq 0 ]]; then
skip_pipeline=".gitlab/ci/_skip.yml"
echo "Using ${skip_pipeline} due to no impacted FOSS rspec tests to run"
cp $skip_pipeline "$pipeline_yml"
exit
fi
# As of 2022-09-01:
# $ find spec -type f | wc -l
# 12825
# and
# $ find ee/spec -type f | wc -l
# 5610
# which gives a total of 18435 test files (`number_of_tests_in_total_in_the_test_suite`).
#
# Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/) is 170183 seconds (`duration_of_the_test_suite_in_seconds`).
#
# This gives an approximate 170183 / 18435 = 9.2 seconds per test file (`average_test_file_duration_in_seconds`).
#
# If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`setup_duration_in_seconds`), then we need to give 7 minutes of testing to each test node (`optimal_test_runtime_duration_in_seconds`).
# (7 * 60) / 9.2 = 45.6
#
# So if we'd want to run the full test suites in 10 minutes (`optimal_test_job_duration_in_seconds`), we'd need to run at max 45 test file per nodes (`optimal_test_file_count_per_node`).
number_of_tests_in_total_in_the_test_suite=18435
duration_of_the_test_suite_in_seconds=170183
optimal_test_job_duration_in_seconds=600 # 10 minutes
setup_duration_in_seconds=180 # 3 minutes
optimal_test_runtime_duration_in_seconds=$(( optimal_test_job_duration_in_seconds - setup_duration_in_seconds ))
echoinfo "optimal_test_runtime_duration_in_seconds: ${optimal_test_runtime_duration_in_seconds}"
average_test_file_duration_in_seconds=$(( duration_of_the_test_suite_in_seconds / number_of_tests_in_total_in_the_test_suite ))
echoinfo "average_test_file_duration_in_seconds: ${average_test_file_duration_in_seconds}"
optimal_test_file_count_per_node=$(( optimal_test_runtime_duration_in_seconds / average_test_file_duration_in_seconds ))
echoinfo "optimal_test_file_count_per_node: ${optimal_test_file_count_per_node}"
node_count=$(( test_file_count / optimal_test_file_count_per_node ))
echoinfo "node_count: ${node_count}"
echoinfo "Optimal node count for 'rspec foss-impact' jobs is ${node_count}."
MAX_NODES_COUNT=50 # Maximum parallelization allowed by GitLab
if [[ "${node_count}" -gt "${MAX_NODES_COUNT}" ]]; then
echoinfo "We don't want to parallelize 'rspec foss-impact' to more than ${MAX_NODES_COUNT} jobs for now! Decreasing the parallelization to ${MAX_NODES_COUNT}."
node_count=${MAX_NODES_COUNT}
fi
ruby -rerb -e "puts ERB.new(File.read('.gitlab/ci/rails/rspec-foss-impact.gitlab-ci.yml.erb')).result_with_hash(parallel_value: ${node_count})" > "${pipeline_yml}"
echosuccess "Generated ${pipeline_yml} pipeline with following content:"
cat "${pipeline_yml}"
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'optparse'
require 'json'
require 'fileutils'
require 'erb'
require_relative '../tooling/quality/test_level'
# Class to generate RSpec test child pipeline with dynamically parallelized jobs.
class GenerateRspecPipeline
SKIP_PIPELINE_YML_FILE = ".gitlab/ci/_skip.yml"
TEST_LEVELS = %i[migration background_migration unit integration system].freeze
MAX_NODES_COUNT = 50 # Maximum parallelization allowed by GitLab
OPTIMAL_TEST_JOB_DURATION_IN_SECONDS = 600 # 10 MINUTES
SETUP_DURATION_IN_SECONDS = 180.0 # 3 MINUTES
OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS = OPTIMAL_TEST_JOB_DURATION_IN_SECONDS - SETUP_DURATION_IN_SECONDS
# As of 2022-09-01:
# $ find spec -type f | wc -l
# 12825
# and
# $ find ee/spec -type f | wc -l
# 5610
# which gives a total of 18435 test files (`NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE`).
#
# Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/)
# is 170183 seconds (`DURATION_OF_THE_TEST_SUITE_IN_SECONDS`).
#
# This gives an approximate 170183 / 18435 = 9.2 seconds per test file
# (`DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS`).
#
# If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`SETUP_DURATION_IN_SECONDS`),
# then we need to give 7 minutes of testing to each test node (`OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS`).
# (7 * 60) / 9.2 = 45.6
#
# So if we'd want to run the full test suites in 10 minutes (`OPTIMAL_TEST_JOB_DURATION_IN_SECONDS`),
# we'd need to run at max 45 test file per nodes (`#optimal_test_file_count_per_node_per_test_level`).
NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE = 18_435
DURATION_OF_THE_TEST_SUITE_IN_SECONDS = 170_183
DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS =
DURATION_OF_THE_TEST_SUITE_IN_SECONDS / NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE
# rspec_files_path: A file containing RSpec files to run, separated by a space
# pipeline_template_path: A YAML pipeline configuration template to generate the final pipeline config from
def initialize(pipeline_template_path:, rspec_files_path: nil, knapsack_report_path: nil)
@pipeline_template_path = pipeline_template_path.to_s
@rspec_files_path = rspec_files_path.to_s
@knapsack_report_path = knapsack_report_path.to_s
raise ArgumentError unless File.exist?(@pipeline_template_path)
end
def generate!
if all_rspec_files.empty?
info "Using #{SKIP_PIPELINE_YML_FILE} due to no RSpec files to run"
FileUtils.cp(SKIP_PIPELINE_YML_FILE, pipeline_filename)
return
end
File.open(pipeline_filename, 'w') do |handle|
pipeline_yaml = ERB.new(File.read(pipeline_template_path)).result_with_hash(**erb_binding)
handle.write(pipeline_yaml.squeeze("\n").strip)
end
end
private
attr_reader :pipeline_template_path, :rspec_files_path, :knapsack_report_path
def info(text)
$stdout.puts "[#{self.class.name}] #{text}"
end
def all_rspec_files
@all_rspec_files ||= File.exist?(rspec_files_path) ? File.read(rspec_files_path).split(' ') : []
end
def pipeline_filename
@pipeline_filename ||= "#{pipeline_template_path}.yml"
end
def erb_binding
{ rspec_files_per_test_level: rspec_files_per_test_level }
end
def rspec_files_per_test_level
@rspec_files_per_test_level ||= begin
all_remaining_rspec_files = all_rspec_files.dup
TEST_LEVELS.each_with_object(Hash.new { |h, k| h[k] = {} }) do |test_level, memo| # rubocop:disable Rails/IndexWith
memo[test_level][:files] = all_remaining_rspec_files
.grep(Quality::TestLevel.new.regexp(test_level))
.tap { |files| files.each { |file| all_remaining_rspec_files.delete(file) } }
memo[test_level][:parallelization] = optimal_nodes_count(test_level, memo[test_level][:files])
end
end
end
def optimal_nodes_count(test_level, rspec_files)
nodes_count = (rspec_files.size / optimal_test_file_count_per_node_per_test_level(test_level)).ceil
info "Optimal node count for #{rspec_files.size} #{test_level} RSpec files is #{nodes_count}."
if nodes_count > MAX_NODES_COUNT
info "We don't want to parallelize to more than #{MAX_NODES_COUNT} jobs for now! " \
"Decreasing the parallelization to #{MAX_NODES_COUNT}."
MAX_NODES_COUNT
else
nodes_count
end
end
def optimal_test_file_count_per_node_per_test_level(test_level)
[
(OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS / average_test_file_duration_in_seconds_per_test_level[test_level]),
1
].max
end
def average_test_file_duration_in_seconds_per_test_level
@optimal_test_file_count_per_node_per_test_level ||=
if knapsack_report.any?
remaining_knapsack_report = knapsack_report.dup
TEST_LEVELS.each_with_object({}) do |test_level, memo|
matching_data_per_test_level = remaining_knapsack_report
.select { |test_file, _| test_file.match?(Quality::TestLevel.new.regexp(test_level)) }
.tap { |test_data| test_data.each { |file, _| remaining_knapsack_report.delete(file) } }
memo[test_level] =
matching_data_per_test_level.values.sum / matching_data_per_test_level.keys.size
end
else
TEST_LEVELS.each_with_object({}) do |test_level, memo| # rubocop:disable Rails/IndexWith
memo[test_level] = DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS
end
end
end
def knapsack_report
@knapsack_report ||=
begin
File.exist?(knapsack_report_path) ? JSON.parse(File.read(knapsack_report_path)) : {}
rescue JSON::ParserError => e
info "[ERROR] Knapsack report at #{knapsack_report_path} couldn't be parsed! Error:\n#{e}"
{}
end
end
end
if $PROGRAM_NAME == __FILE__
options = {}
OptionParser.new do |opts|
opts.on("-f", "--rspec-files-path path", String, "Path to a file containing RSpec files to run, " \
"separated by a space") do |value|
options[:rspec_files_path] = value
end
opts.on("-t", "--pipeline-template-path PATH", String, "Path to a YAML pipeline configuration template to " \
"generate the final pipeline config from") do |value|
options[:pipeline_template_path] = value
end
opts.on("-k", "--knapsack-report-path path", String, "Path to a Knapsack report") do |value|
options[:knapsack_report_path] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
GenerateRspecPipeline.new(**options).generate!
end
# frozen_string_literal: true
require 'fast_spec_helper'
require 'tempfile'
require_relative '../../scripts/generate_rspec_pipeline'
RSpec.describe GenerateRspecPipeline, :silence_stdout, feature_category: :tooling do
describe '#generate!' do
let!(:rspec_files) { Tempfile.new(['rspec_files_path', '.txt']) }
let(:rspec_files_content) do
"spec/migrations/a_spec.rb spec/migrations/b_spec.rb " \
"spec/lib/gitlab/background_migration/a_spec.rb spec/lib/gitlab/background_migration/b_spec.rb " \
"spec/models/a_spec.rb spec/models/b_spec.rb " \
"spec/controllers/a_spec.rb spec/controllers/b_spec.rb " \
"spec/features/a_spec.rb spec/features/b_spec.rb"
end
let(:pipeline_template) { Tempfile.new(['pipeline_template', '.yml.erb']) }
let(:pipeline_template_content) do
<<~YAML
<% if rspec_files_per_test_level[:migration][:files].size > 0 %>
rspec migration:
<% if rspec_files_per_test_level[:migration][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:migration][:parallelization] %>
<% end %>
<% end %>
<% if rspec_files_per_test_level[:background_migration][:files].size > 0 %>
rspec background_migration:
<% if rspec_files_per_test_level[:background_migration][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:background_migration][:parallelization] %>
<% end %>
<% end %>
<% if rspec_files_per_test_level[:unit][:files].size > 0 %>
rspec unit:
<% if rspec_files_per_test_level[:unit][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:unit][:parallelization] %>
<% end %>
<% end %>
<% if rspec_files_per_test_level[:integration][:files].size > 0 %>
rspec integration:
<% if rspec_files_per_test_level[:integration][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:integration][:parallelization] %>
<% end %>
<% end %>
<% if rspec_files_per_test_level[:system][:files].size > 0 %>
rspec system:
<% if rspec_files_per_test_level[:system][:parallelization] > 1 %>
parallel: <%= rspec_files_per_test_level[:system][:parallelization] %>
<% end %>
<% end %>
YAML
end
let(:knapsack_report) { Tempfile.new(['knapsack_report', '.json']) }
let(:knapsack_report_content) do
<<~JSON
{
"spec/migrations/a_spec.rb": 360.3,
"spec/migrations/b_spec.rb": 180.1,
"spec/lib/gitlab/background_migration/a_spec.rb": 60.5,
"spec/lib/gitlab/background_migration/b_spec.rb": 180.3,
"spec/models/a_spec.rb": 360.2,
"spec/models/b_spec.rb": 180.6,
"spec/controllers/a_spec.rb": 60.2,
"spec/controllers/ab_spec.rb": 180.4,
"spec/features/a_spec.rb": 360.1,
"spec/features/b_spec.rb": 180.5
}
JSON
end
around do |example|
rspec_files.write(rspec_files_content)
rspec_files.rewind
pipeline_template.write(pipeline_template_content)
pipeline_template.rewind
knapsack_report.write(knapsack_report_content)
knapsack_report.rewind
example.run
ensure
rspec_files.close
rspec_files.unlink
pipeline_template.close
pipeline_template.unlink
knapsack_report.close
knapsack_report.unlink
end
context 'when rspec_files and pipeline_template_path exists' do
subject do
described_class.new(
rspec_files_path: rspec_files.path,
pipeline_template_path: pipeline_template.path
)
end
it 'generates the pipeline config with default parallelization' do
subject.generate!
expect(File.read("#{pipeline_template.path}.yml"))
.to eq(
"rspec migration:\nrspec background_migration:\nrspec unit:\n" \
"rspec integration:\nrspec system:"
)
end
context 'when parallelization > 0' do
before do
stub_const("#{described_class}::DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS", 360)
end
it 'generates the pipeline config' do
subject.generate!
expect(File.read("#{pipeline_template.path}.yml"))
.to eq(
"rspec migration:\n parallel: 2\nrspec background_migration:\n parallel: 2\n" \
"rspec unit:\n parallel: 2\nrspec integration:\n parallel: 2\n" \
"rspec system:\n parallel: 2"
)
end
end
context 'when parallelization > MAX_NODES_COUNT' do
let(:rspec_files_content) do
Array.new(51) { |i| "spec/migrations/#{i}_spec.rb" }.join(' ')
end
before do
stub_const(
"#{described_class}::DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS",
described_class::OPTIMAL_TEST_JOB_DURATION_IN_SECONDS
)
end
it 'generates the pipeline config with max parallelization of 50' do
subject.generate!
expect(File.read("#{pipeline_template.path}.yml")).to eq("rspec migration:\n parallel: 50")
end
end
end
context 'when knapsack_report_path is given' do
subject do
described_class.new(
rspec_files_path: rspec_files.path,
pipeline_template_path: pipeline_template.path,
knapsack_report_path: knapsack_report.path
)
end
it 'generates the pipeline config with parallelization based on Knapsack' do
subject.generate!
expect(File.read("#{pipeline_template.path}.yml"))
.to eq(
"rspec migration:\n parallel: 2\nrspec background_migration:\n" \
"rspec unit:\n parallel: 2\nrspec integration:\n" \
"rspec system:\n parallel: 2"
)
end
context 'and Knapsack report does not contain valid JSON' do
let(:knapsack_report_content) { "#{super()}," }
it 'generates the pipeline config with default parallelization' do
subject.generate!
expect(File.read("#{pipeline_template.path}.yml"))
.to eq(
"rspec migration:\nrspec background_migration:\nrspec unit:\n" \
"rspec integration:\nrspec system:"
)
end
end
end
context 'when rspec_files does not exist' do
subject { described_class.new(rspec_files_path: nil, pipeline_template_path: pipeline_template.path) }
it 'generates the pipeline config using the no-op template' do
subject.generate!
expect(File.read("#{pipeline_template.path}.yml")).to include("no-op:")
end
end
context 'when pipeline_template_path does not exist' do
subject { described_class.new(rspec_files_path: rspec_files.path, pipeline_template_path: nil) }
it 'generates the pipeline config using the no-op template' do
expect { subject }.to raise_error(ArgumentError)
end
end
end
end
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册