-
由 Rémy Coutable 创作于
Instead of using the average test file duration for a given test level type, we now use the duration of the actual test files that will run in the predictive jobs. That way, if we only run very slow test files, the computation will take it into account and parallelize on more jobs. Signed-off-by:
Rémy Coutable <remy@rymai.me>
由 Rémy Coutable 创作于Instead of using the average test file duration for a given test level type, we now use the duration of the actual test files that will run in the predictive jobs. That way, if we only run very slow test files, the computation will take it into account and parallelize on more jobs. Signed-off-by:
Rémy Coutable <remy@rymai.me>
代码所有者
将用户和群组指定为特定文件更改的核准人。 了解更多。
generate_rspec_pipeline.rb 8.30 KiB
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'optparse'
require 'json'
require 'fileutils'
require 'erb'
require_relative '../tooling/quality/test_level'
# Class to generate RSpec test child pipeline with dynamically parallelized jobs.
class GenerateRspecPipeline
SKIP_PIPELINE_YML_FILE = ".gitlab/ci/_skip.yml"
TEST_LEVELS = %i[migration background_migration unit integration system].freeze
MAX_NODES_COUNT = 50 # Maximum parallelization allowed by GitLab
OPTIMAL_TEST_JOB_DURATION_IN_SECONDS = 600 # 10 MINUTES
SETUP_DURATION_IN_SECONDS = 180.0 # 3 MINUTES
OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS = OPTIMAL_TEST_JOB_DURATION_IN_SECONDS - SETUP_DURATION_IN_SECONDS
# As of 2022-09-01:
# $ find spec -type f | wc -l
# 12825
# and
# $ find ee/spec -type f | wc -l
# 5610
# which gives a total of 18435 test files (`NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE`).
#
# Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/)
# is 170183 seconds (`DURATION_OF_THE_TEST_SUITE_IN_SECONDS`).
#
# This gives an approximate 170183 / 18435 = 9.2 seconds per test file
# (`DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS`).
#
# If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`SETUP_DURATION_IN_SECONDS`),
# then we need to give 7 minutes of testing to each test node (`OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS`).
# (7 * 60) / 9.2 = 45.6
#
# So if we'd want to run the full test suites in 10 minutes (`OPTIMAL_TEST_JOB_DURATION_IN_SECONDS`),
# we'd need to run at max 45 test file per nodes (`#optimal_test_file_count_per_node_per_test_level`).
NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE = 18_435
DURATION_OF_THE_TEST_SUITE_IN_SECONDS = 170_183
DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS =
DURATION_OF_THE_TEST_SUITE_IN_SECONDS / NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE
# pipeline_template_path: A YAML pipeline configuration template to generate the final pipeline config from
# rspec_files_path: A file containing RSpec files to run, separated by a space
# knapsack_report_path: A file containing a Knapsack report
# test_suite_prefix: An optional test suite folder prefix (e.g. `ee/` or `jh/`)
# generated_pipeline_path: An optional filename where to write the pipeline config (defaults to
# `"#{pipeline_template_path}.yml"`)
def initialize(
pipeline_template_path:, rspec_files_path: nil, knapsack_report_path: nil, test_suite_prefix: nil,
generated_pipeline_path: nil)
@pipeline_template_path = pipeline_template_path.to_s
@rspec_files_path = rspec_files_path.to_s
@knapsack_report_path = knapsack_report_path.to_s
@test_suite_prefix = test_suite_prefix
@generated_pipeline_path = generated_pipeline_path || "#{pipeline_template_path}.yml"
raise ArgumentError unless File.exist?(@pipeline_template_path)
end
def generate!
if all_rspec_files.empty?
info "Using #{SKIP_PIPELINE_YML_FILE} due to no RSpec files to run"
FileUtils.cp(SKIP_PIPELINE_YML_FILE, generated_pipeline_path)
return
end
info "pipeline_template_path: #{pipeline_template_path}"
info "generated_pipeline_path: #{generated_pipeline_path}"
File.open(generated_pipeline_path, 'w') do |handle|
pipeline_yaml = ERB.new(File.read(pipeline_template_path)).result_with_hash(**erb_binding)
handle.write(pipeline_yaml.squeeze("\n").strip)
end
end
private
attr_reader :pipeline_template_path, :rspec_files_path, :knapsack_report_path, :test_suite_prefix,
:generated_pipeline_path
def info(text)
$stdout.puts "[#{self.class.name}] #{text}"
end
def all_rspec_files
@all_rspec_files ||= File.exist?(rspec_files_path) ? File.read(rspec_files_path).split(' ') : []
end
def erb_binding
{
rspec_files_per_test_level: rspec_files_per_test_level,
test_suite_prefix: test_suite_prefix
}
end
def rspec_files_per_test_level
@rspec_files_per_test_level ||= begin
all_remaining_rspec_files = all_rspec_files.dup
TEST_LEVELS.each_with_object(Hash.new { |h, k| h[k] = {} }) do |test_level, memo| # rubocop:disable Rails/IndexWith
memo[test_level][:files] = all_remaining_rspec_files
.grep(test_level_service.regexp(test_level, true))
.tap { |files| files.each { |file| all_remaining_rspec_files.delete(file) } }
memo[test_level][:parallelization] = optimal_nodes_count(test_level, memo[test_level][:files])
end
end
end
def optimal_nodes_count(test_level, rspec_files)
nodes_count = (rspec_files.size / optimal_test_file_count_per_node_per_test_level(test_level, rspec_files)).ceil
info "Optimal node count for #{rspec_files.size} #{test_level} RSpec files is #{nodes_count}."
if nodes_count > MAX_NODES_COUNT
info "We don't want to parallelize to more than #{MAX_NODES_COUNT} jobs for now! " \
"Decreasing the parallelization to #{MAX_NODES_COUNT}."
MAX_NODES_COUNT
else
nodes_count
end
end
def optimal_test_file_count_per_node_per_test_level(test_level, rspec_files)
[
(OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS / average_test_file_duration(test_level, rspec_files)),
1
].max
end
def average_test_file_duration(test_level, rspec_files)
if rspec_files.any? && knapsack_report.any?
rspec_files_duration = rspec_files.sum do |rspec_file|
knapsack_report.fetch(
rspec_file, average_test_file_duration_per_test_level[test_level])
end
rspec_files_duration / rspec_files.size
else
average_test_file_duration_per_test_level[test_level]
end
end
def average_test_file_duration_per_test_level
@optimal_test_file_count_per_node_per_test_level ||=
if knapsack_report.any?
remaining_knapsack_report = knapsack_report.dup
TEST_LEVELS.each_with_object({}) do |test_level, memo|
matching_data_per_test_level = remaining_knapsack_report
.select { |test_file, _| test_file.match?(test_level_service.regexp(test_level, true)) }
.tap { |test_data| test_data.each { |file, _| remaining_knapsack_report.delete(file) } }
memo[test_level] =
if matching_data_per_test_level.empty?
DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS
else
matching_data_per_test_level.values.sum / matching_data_per_test_level.keys.size
end
end
else
TEST_LEVELS.each_with_object({}) do |test_level, memo| # rubocop:disable Rails/IndexWith
memo[test_level] = DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS
end
end
end
def knapsack_report
@knapsack_report ||=
begin
File.exist?(knapsack_report_path) ? JSON.parse(File.read(knapsack_report_path)) : {}
rescue JSON::ParserError => e
info "[ERROR] Knapsack report at #{knapsack_report_path} couldn't be parsed! Error:\n#{e}"
{}
end
end
def test_level_service
@test_level_service ||= Quality::TestLevel.new(test_suite_prefix)
end
end
if $PROGRAM_NAME == __FILE__
options = {}
OptionParser.new do |opts|
opts.on("-f", "--rspec-files-path path", String, "Path to a file containing RSpec files to run, " \
"separated by a space") do |value|
options[:rspec_files_path] = value
end
opts.on("-t", "--pipeline-template-path PATH", String, "Path to a YAML pipeline configuration template to " \
"generate the final pipeline config from") do |value|
options[:pipeline_template_path] = value
end
opts.on("-k", "--knapsack-report-path path", String, "Path to a Knapsack report") do |value|
options[:knapsack_report_path] = value
end
opts.on("-p", "--test-suite-prefix test_suite_prefix", String, "Test suite folder prefix") do |value|
options[:test_suite_prefix] = value
end
opts.on("-o", "--generated-pipeline-path generated_pipeline_path", String, "Path where to write the pipeline " \
"config") do |value|
options[:generated_pipeline_path] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
GenerateRspecPipeline.new(**options).generate!
end