0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
mongodb/buildscripts/selected_tests.py

518 lines
21 KiB
Python

#!/usr/bin/env python3
"""Command line utility for determining what jstests should run for the given changed files."""
import os
import re
import sys
from datetime import datetime, timedelta
from functools import partial
from typing import Any, Dict, List, Set, Optional
import click
import inject
import structlog
from pydantic import BaseModel
from structlog.stdlib import LoggerFactory
from git import Repo
from evergreen.api import EvergreenApi, RetryingEvergreenApi
if __name__ == "__main__" and __package__ is None:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# pylint: disable=wrong-import-position
# Get relative imports to work when the package is not installed on the PYTHONPATH.
from buildscripts.patch_builds.change_data import find_changed_files_in_repos
from buildscripts.patch_builds.evg_change_data import generate_revision_map_from_manifest
from buildscripts.patch_builds.selected_tests.selected_tests_client import SelectedTestsClient
from buildscripts.task_generation.evg_config_builder import EvgConfigBuilder
from buildscripts.task_generation.gen_config import GenerationConfiguration
from buildscripts.task_generation.generated_config import GeneratedConfiguration
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyConfig
from buildscripts.task_generation.suite_split import SuiteSplitParameters, SuiteSplitConfig
from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy, \
greedy_division, round_robin_fallback
from buildscripts.task_generation.task_types.gentask_options import GenTaskOptions
from buildscripts.task_generation.task_types.resmoke_tasks import ResmokeGenTaskParams
from buildscripts.util.cmdutils import enable_logging
from buildscripts.util.fileops import read_yaml_file
from buildscripts.burn_in_tests import DEFAULT_REPO_LOCATIONS, create_task_list_for_tests, \
TaskInfo
from buildscripts.ciconfig.evergreen import (
EvergreenProjectConfig,
ResmokeArgs,
Task,
parse_evergreen_file,
Variant,
)
from buildscripts.patch_builds.selected_tests.selected_tests_service import SelectedTestsService
structlog.configure(logger_factory=LoggerFactory())
LOGGER = structlog.getLogger(__name__)
DEFAULT_TEST_SUITE_DIR = os.path.join("buildscripts", "resmokeconfig", "suites")
TASK_ID_EXPANSION = "task_id"
EVERGREEN_FILE = "etc/evergreen.yml"
EVG_CONFIG_FILE = ".evergreen.yml"
SELECTED_TESTS_CONFIG_DIR = "generated_resmoke_config"
RELATION_THRESHOLD = 0
LOOKBACK_DURATION_DAYS = 14
COMPILE_TASK_PATTERN = re.compile(".*compile.*")
CONCURRENCY_TASK_PATTERN = re.compile("concurrency.*")
INTEGRATION_TASK_PATTERN = re.compile("integration.*")
FUZZER_TASK_PATTERN = re.compile(".*fuzz.*")
GENERATE_TASK_PATTERN = re.compile("burn_in.*")
MULTIVERSION_TASK_PATTERN = re.compile(".*multiversion.*")
LINT_TASK_PATTERN = re.compile("lint.*")
STITCH_TASK_PATTERN = re.compile("stitch.*")
EXCLUDE_TASK_PATTERNS = [
COMPILE_TASK_PATTERN, CONCURRENCY_TASK_PATTERN, INTEGRATION_TASK_PATTERN, FUZZER_TASK_PATTERN,
GENERATE_TASK_PATTERN, LINT_TASK_PATTERN, MULTIVERSION_TASK_PATTERN, STITCH_TASK_PATTERN
]
CPP_TASK_NAMES = [
"dbtest",
"idl_tests",
"run_unittests",
]
PUBLISH_TASK_NAMES = [
"package",
"publish_packages",
"push",
]
PYTHON_TESTS = ["buildscripts_test"]
EXCLUDE_TASK_LIST = [
*CPP_TASK_NAMES,
*PYTHON_TESTS,
*PUBLISH_TASK_NAMES,
]
POSSIBLE_RUN_TASK_FUNCS = [
"generate resmoke tasks",
"generate randomized multiversion tasks",
"run tests",
"generate explicit multiversion tasks",
]
class EvgExpansions(BaseModel):
"""
Evergreen expansions needed for selected tests.
task_id: ID of task being run.
task_name: Name of task being run.
build_variant: Name of build variant being run on.
build_id: ID of build being run.
is_patch: Is this task run as part of a patch build.
project: Evergreen project being run.
revision: git revision being run against.
version_id: ID of version being run.
"""
task_id: str
task_name: str
build_variant: str
build_id: str
is_patch: Optional[bool] = None
project: str
revision: str
version_id: str
@classmethod
def from_yaml_file(cls, path: str) -> "EvgExpansions":
"""Read the generation configuration from the given file."""
return cls(**read_yaml_file(path))
def build_gen_task_options(self) -> GenTaskOptions:
"""Build options needed to generate tasks."""
return GenTaskOptions(create_misc_suite=False,
generated_config_dir=SELECTED_TESTS_CONFIG_DIR, is_patch=self.is_patch
or False, use_default_timeouts=False)
def build_suite_split_config(self, start_date: datetime,
end_date: datetime) -> SuiteSplitConfig:
"""
Build options need to split suite into sub-suites.
:param start_date: Start date to look at historic results.
:param end_date: End date to look at historic results.
:return: Options for splitting suites.
"""
return SuiteSplitConfig(
evg_project=self.project,
target_resmoke_time=60,
max_sub_suites=5,
max_tests_per_suite=100,
start_date=start_date,
end_date=end_date,
include_build_variant_in_name=True,
)
def get_config_location(self) -> str:
"""Get the location the generated configuration will be stored."""
return f"{self.build_variant}/{self.revision}/generate_tasks/{self.task_name}-{self.build_id}.tgz"
class TaskConfigService:
"""Service for generating selected tests task configuration."""
@staticmethod
def get_evg_task_config(task: Task, build_variant_config: Variant) -> Dict[str, Any]:
"""
Look up task config of the task to be generated.
:param task: Task to get info for.
:param build_variant_config: Config of build variant to collect task info from.
:return: Task configuration values.
"""
LOGGER.info("Calculating evg_task_config values for task", task=task.name)
task_vars = {}
for run_task_func in POSSIBLE_RUN_TASK_FUNCS:
task_def = task.find_func_command(run_task_func)
if task_def:
task_vars = task_def["vars"]
break
suite_name = ResmokeArgs.get_arg(task_vars["resmoke_args"], "suites")
if suite_name:
task_vars.update({"suite": suite_name})
# the suites argument will run all tests in a suite even when individual
# tests are specified in resmoke_args, so we remove it
resmoke_args_without_suites = ResmokeArgs.remove_arg(task_vars["resmoke_args"], "suites")
task_vars["resmoke_args"] = resmoke_args_without_suites
task_name = task.name[:-4] if task.name.endswith("_gen") else task.name
return {
"task_name": task_name,
"build_variant": build_variant_config.name,
**task_vars,
"large_distro_name": build_variant_config.expansion("large_distro_name"),
}
def get_task_configs_for_test_mappings(self, tests_by_task: Dict[str, TaskInfo],
build_variant_config: Variant) -> Dict[str, dict]:
"""
For test mappings, generate a dict containing task names and their config settings.
:param tests_by_task: Dictionary of tests and tasks to run.
:param build_variant_config: Config of build variant to collect task info from.
:return: Dict of task names and their config settings.
"""
evg_task_configs = {}
for task_name, test_list_info in tests_by_task.items():
task = _find_task(build_variant_config, task_name)
if task and not _exclude_task(task):
evg_task_config = self.get_evg_task_config(task, build_variant_config)
evg_task_config.update({"selected_tests_to_run": set(test_list_info.tests)})
evg_task_configs[task.name] = evg_task_config
return evg_task_configs
def get_task_configs_for_task_mappings(self, related_tasks: List[str],
build_variant_config: Variant) -> Dict[str, dict]:
"""
For task mappings, generate a dict containing task names and their config settings.
:param related_tasks: List of tasks to run.
:param build_variant_config: Config of build variant to collect task info from.
:return: Dict of task names and their config settings.
"""
evg_task_configs = {}
for task_name in related_tasks:
task = _find_task(build_variant_config, task_name)
if task and not _exclude_task(task):
evg_task_config = self.get_evg_task_config(task, build_variant_config)
evg_task_configs[task.name] = evg_task_config
return evg_task_configs
def _exclude_task(task: Task) -> bool:
"""
Check whether a task should be excluded.
:param task: Task to get info for.
:return: True if this task should be excluded.
"""
if task.name in EXCLUDE_TASK_LIST or any(
regex.match(task.name) for regex in EXCLUDE_TASK_PATTERNS):
LOGGER.debug("Excluding task from analysis because it is not a jstest", task=task.name)
return True
return False
def _find_task(build_variant_config: Variant, task_name: str) -> Task:
"""
Look up shrub config for task.
:param build_variant_config: Config of build variant to collect task info from.
:param task_name: Name of task to get info for.
:return: Task configuration.
"""
task = build_variant_config.get_task(task_name)
if not task:
task = build_variant_config.get_task(task_name + "_gen")
return task
def _remove_repo_path_prefix(file_path: str) -> str:
"""
Remove the repo path prefix from the filepath.
:param file_path: Path of the changed file.
:return: Path of the changed file without prefix.
"""
for repo_path in DEFAULT_REPO_LOCATIONS:
if repo_path != ".":
if repo_path.startswith("./"):
repo_path = repo_path[2:]
file_path = re.sub(repo_path + "/", '', file_path)
return file_path
def filter_set(item: str, input_set: Set[str]) -> bool:
"""
Filter to determine if the given item is in the given set.
:param item: Item to search for.
:param input_set: Set to search.
:return: True if the item is contained in the list.
"""
return item in input_set
class SelectedTestsOrchestrator:
"""Orchestrator for generating selected test builds."""
# pylint: disable=too-many-arguments
@inject.autoparams()
def __init__(self, evg_api: EvergreenApi, evg_conf: EvergreenProjectConfig,
selected_tests_service: SelectedTestsService,
task_config_service: TaskConfigService, evg_expansions: EvgExpansions) -> None:
"""
Initialize the orchestrator.
:param evg_api: Evergreen API client.
:param evg_conf: Evergreen Project configuration.
:param selected_tests_service: Selected tests service.
:param task_config_service: Task Config service.
:param evg_expansions: Evergreen expansions.
"""
self.evg_api = evg_api
self.evg_conf = evg_conf
self.selected_tests_service = selected_tests_service
self.task_config_service = task_config_service
self.evg_expansions = evg_expansions
def find_changed_files(self, repos: List[Repo], task_id: str) -> Set[str]:
"""
Determine what files have changed in the given repos.
:param repos: List of git repos to query.
:param task_id: ID of task being run.
:return: Set of files that contain changes.
"""
revision_map = generate_revision_map_from_manifest(repos, task_id, self.evg_api)
changed_files = find_changed_files_in_repos(repos, revision_map)
changed_files = {_remove_repo_path_prefix(file_path) for file_path in changed_files}
changed_files = {
file_path
for file_path in changed_files if not file_path.startswith("src/third_party")
}
LOGGER.info("Found changed files", files=changed_files)
return changed_files
def get_task_config(self, build_variant_config: Variant,
changed_files: Set[str]) -> Dict[str, Dict]:
"""
Get task configurations for the tasks to be generated.
:param build_variant_config: Config of build variant to collect task info from.
:param changed_files: Set of changed_files.
:return: Task configurations.
"""
existing_tasks = self.get_existing_tasks(self.evg_expansions.version_id,
build_variant_config.name)
task_configs = {}
related_test_files = self.selected_tests_service.find_selected_test_files(changed_files)
LOGGER.info("related test files found", related_test_files=related_test_files,
variant=build_variant_config.name)
if related_test_files:
tests_by_task = create_task_list_for_tests(related_test_files,
build_variant_config.name, self.evg_conf)
LOGGER.info("tests and tasks found", tests_by_task=tests_by_task)
tests_by_task = {
task: tests
for task, tests in tests_by_task.items() if task not in existing_tasks
}
test_mapping_task_configs = self.task_config_service.get_task_configs_for_test_mappings(
tests_by_task, build_variant_config)
task_configs.update(test_mapping_task_configs)
related_tasks = self.selected_tests_service.find_selected_tasks(changed_files)
LOGGER.info("related tasks found", related_tasks=related_tasks,
variant=build_variant_config.name)
related_tasks = {task for task in related_tasks if task not in existing_tasks}
if related_tasks:
task_mapping_task_configs = self.task_config_service.get_task_configs_for_task_mappings(
list(related_tasks), build_variant_config)
# task_mapping_task_configs will overwrite test_mapping_task_configs
# because task_mapping_task_configs will run all tests rather than a subset of tests
# and we should err on the side of running all tests
task_configs.update(task_mapping_task_configs)
return task_configs
def get_existing_tasks(self, version_id: str, build_variant: str) -> Set[str]:
"""
Get the set of tasks that already exist in the given build.
:param version_id: ID of version to query.
:param build_variant: Name of build variant to query.
:return: Set of task names that already exist in the specified build.
"""
version = self.evg_api.version_by_id(version_id)
try:
build = version.build_by_variant(build_variant)
except KeyError:
LOGGER.debug("No build exists on this build variant for this version yet",
variant=build_variant)
return set()
if build:
tasks_already_in_build = build.get_tasks()
return {task.display_name for task in tasks_already_in_build}
return set()
def generate_build_variant(self, build_variant_config: Variant, changed_files: Set[str],
builder: EvgConfigBuilder) -> None:
"""
Generate the selected tasks on the specified build variant.
:param build_variant_config: Configuration of build variant to generate.
:param changed_files: List of file changes to determine what to run.
:param builder: Builder to create new configuration.
"""
build_variant_name = build_variant_config.name
LOGGER.info("Generating build variant", build_variant=build_variant_name)
task_configs = self.get_task_config(build_variant_config, changed_files)
for task_config in task_configs.values():
test_filter = None
if "selected_tests_to_run" in task_config:
test_filter = partial(filter_set, input_set=task_config["selected_tests_to_run"])
split_params = SuiteSplitParameters(
build_variant=build_variant_name,
task_name=task_config["task_name"],
suite_name=task_config.get("suite", task_config["task_name"]),
filename=task_config.get("suite", task_config["task_name"]),
test_file_filter=test_filter,
is_asan=build_variant_config.is_asan_build(),
)
gen_params = ResmokeGenTaskParams(
use_large_distro=task_config.get("use_large_distro", False),
large_distro_name=task_config.get("large_distro_name"),
require_multiversion=task_config.get("require_multiversion"),
repeat_suites=task_config.get("repeat_suites", 1),
resmoke_args=task_config["resmoke_args"],
resmoke_jobs_max=task_config.get("resmoke_jobs_max"),
config_location=self.evg_expansions.get_config_location(),
)
builder.generate_suite(split_params, gen_params)
def generate(self, repos: List[Repo], task_id: str) -> None:
"""
Build and generate the configuration to create selected tests.
:param repos: List of git repos containing changes to check.
:param task_id: ID of task being run.
"""
changed_files = self.find_changed_files(repos, task_id)
generated_config = self.generate_version(changed_files)
generated_config.write_all_to_dir(SELECTED_TESTS_CONFIG_DIR)
def generate_version(self, changed_files: Set[str]) -> GeneratedConfiguration:
"""
Generate selected tests configuration for the given file changes.
:param changed_files: Set of files that contain changes.
:return: Configuration to generate selected-tests tasks.
"""
builder = EvgConfigBuilder() # pylint: disable=no-value-for-parameter
for build_variant_config in self.evg_conf.get_required_variants():
self.generate_build_variant(build_variant_config, changed_files, builder)
return builder.build("selected_tests_config.json")
@click.command()
@click.option("--verbose", "verbose", default=False, is_flag=True, help="Enable extra logging.")
@click.option(
"--expansion-file",
"expansion_file",
type=str,
required=True,
help="Location of expansions file generated by evergreen.",
)
@click.option(
"--evg-api-config",
"evg_api_config",
default=EVG_CONFIG_FILE,
metavar="FILE",
help="Configuration file with connection info for Evergreen API.",
)
@click.option(
"--selected-tests-config",
"selected_tests_config",
required=True,
metavar="FILE",
help="Configuration file with connection info for selected tests service.",
)
def main(
verbose: bool,
expansion_file: str,
evg_api_config: str,
selected_tests_config: str,
):
"""
Select tasks to be run based on changed files in a patch build.
:param verbose: Log extra debug information.
:param expansion_file: Configuration file.
:param evg_api_config: Location of configuration file to connect to evergreen.
:param selected_tests_config: Location of config file to connect to elected-tests service.
"""
enable_logging(verbose)
end_date = datetime.utcnow().replace(microsecond=0)
start_date = end_date - timedelta(days=LOOKBACK_DURATION_DAYS)
evg_expansions = EvgExpansions.from_yaml_file(expansion_file)
def dependencies(binder: inject.Binder) -> None:
binder.bind(EvgExpansions, evg_expansions)
binder.bind(EvergreenApi, RetryingEvergreenApi.get_api(config_file=evg_api_config))
binder.bind(EvergreenProjectConfig, parse_evergreen_file(EVERGREEN_FILE))
binder.bind(SelectedTestsClient, SelectedTestsClient.from_file(selected_tests_config))
binder.bind(SuiteSplitConfig, evg_expansions.build_suite_split_config(start_date, end_date))
binder.bind(SplitStrategy, greedy_division)
binder.bind(FallbackStrategy, round_robin_fallback)
binder.bind(GenTaskOptions, evg_expansions.build_gen_task_options())
binder.bind(GenerationConfiguration, GenerationConfiguration.from_yaml_file())
binder.bind(ResmokeProxyConfig,
ResmokeProxyConfig(resmoke_suite_dir=DEFAULT_TEST_SUITE_DIR))
inject.configure(dependencies)
repos = [Repo(x) for x in DEFAULT_REPO_LOCATIONS if os.path.isdir(x)]
selected_tests = SelectedTestsOrchestrator() # pylint: disable=no-value-for-parameter
selected_tests.generate(repos, evg_expansions.task_id)
if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter