0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-21 12:39:08 +01:00

SERVER-71473: Begin using the new test stats location

This commit is contained in:
Jeff Zambory 2022-11-22 19:22:39 +00:00 committed by Evergreen Agent
parent 95bb1d8a0f
commit 81b9c1abbc
7 changed files with 130 additions and 118 deletions

View File

@ -7,7 +7,7 @@ import math
import os
import shlex
import sys
from datetime import datetime, timedelta
from datetime import timedelta
from pathlib import Path
from typing import Dict, List, Optional
@ -19,7 +19,7 @@ from evergreen import EvergreenApi, RetryingEvergreenApi
from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file)
from buildscripts.resmoke_proxy.resmoke_proxy import ResmokeProxyService
from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService, TimeoutSettings)
from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService)
from buildscripts.util.cmdutils import enable_logging
from buildscripts.util.taskname import determine_task_base_name
@ -372,9 +372,6 @@ def main():
options = parser.parse_args()
end_date = datetime.now()
start_date = end_date - HISTORY_LOOKBACK
timeout_override = timedelta(seconds=options.timeout) if options.timeout else None
exec_timeout_override = timedelta(
seconds=options.exec_timeout) if options.exec_timeout else None
@ -389,7 +386,6 @@ def main():
binder.bind(
EvergreenApi,
RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config)))
binder.bind(TimeoutSettings, TimeoutSettings(start_date=start_date, end_date=end_date))
binder.bind(TimeoutOverrides, timeout_overrides)
binder.bind(EvergreenProjectConfig,
parse_evergreen_file(os.path.expanduser(options.evg_project_config)))

View File

@ -3,7 +3,6 @@
import json
import sys
from collections import namedtuple
from datetime import datetime, timedelta
from statistics import mean
from typing import Dict, List
@ -13,7 +12,8 @@ import structlog
from buildscripts.resmokelib.testing.report import TestInfo, TestReport
from buildscripts.resmokelib.utils import get_task_name_without_suffix
from buildscripts.util.cmdutils import enable_logging
from evergreen import RetryingEvergreenApi, TestStats
from buildscripts.util.teststats import HistoricTaskData, HistoricalTestInformation
LOGGER = structlog.get_logger("buildscripts.resmoke_tests_runtime_validate")
@ -34,17 +34,12 @@ def parse_resmoke_report(report_file: str) -> List[TestInfo]:
return [test_info for test_info in test_report.test_infos if "jstests" in test_info.test_file]
def get_historic_stats(evg_api_config: str, project_id: str, test_files: List[str], task_name: str,
build_variant: str) -> List[TestStats]:
def get_historic_stats(project_id: str, task_name: str,
build_variant: str) -> List[HistoricalTestInformation]:
"""Get historic test stats."""
evg_api = RetryingEvergreenApi.get_api(config_file=evg_api_config)
before_date = datetime.today()
after_date = before_date - timedelta(days=LOOK_BACK_NUM_DAYS)
base_task_name = get_task_name_without_suffix(task_name, build_variant).replace(
BURN_IN_PREFIX, "")
return evg_api.test_stats_by_project(project_id=project_id, after_date=after_date,
before_date=before_date, tests=test_files,
tasks=[base_task_name], variants=[build_variant])
return HistoricTaskData.get_stats_from_s3(project_id, base_task_name, build_variant)
def make_stats_map(stats: List[_TestData]) -> Dict[str, List[float]]:
@ -63,13 +58,10 @@ def make_stats_map(stats: List[_TestData]) -> Dict[str, List[float]]:
@click.command()
@click.option("--resmoke-report-file", type=str, required=True,
help="Location of resmoke's report JSON file.")
@click.option("--evg-api-config", type=str, required=True,
help="Location of evergreen api configuration.")
@click.option("--project-id", type=str, required=True, help="Evergreen project id.")
@click.option("--build-variant", type=str, required=True, help="Evergreen build variant name.")
@click.option("--task-name", type=str, required=True, help="Evergreen task name.")
def main(resmoke_report_file: str, evg_api_config: str, project_id: str, build_variant: str,
task_name: str) -> None:
def main(resmoke_report_file: str, project_id: str, build_variant: str, task_name: str) -> None:
"""Compare resmoke tests runtime with historic stats."""
enable_logging(verbose=False)
@ -79,10 +71,9 @@ def main(resmoke_report_file: str, evg_api_config: str, project_id: str, build_v
for test_info in current_test_infos
])
historic_stats = get_historic_stats(evg_api_config, project_id, list(current_stats_map.keys()),
task_name, build_variant)
historic_stats = get_historic_stats(project_id, task_name, build_variant)
historic_stats_map = make_stats_map([
_TestData(test_stats.test_file, test_stats.avg_duration_pass)
_TestData(test_stats.test_name, test_stats.avg_duration_pass)
for test_stats in historic_stats
])

View File

@ -1,41 +1,42 @@
"""Unit tests for timeout_service.py."""
import random
import unittest
from datetime import datetime, timedelta
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch
from requests.exceptions import HTTPError
from evergreen import EvergreenApi
import buildscripts.timeouts.timeout_service as under_test
from buildscripts.resmoke_proxy.resmoke_proxy import ResmokeProxyService
from buildscripts.util.teststats import HistoricTaskData
from buildscripts.util.teststats import HistoricTaskData, HistoricTestInfo
# pylint: disable=invalid-name,protected-access
NS = "buildscripts.timeouts.timeout_service"
def build_mock_service(evg_api=None, resmoke_proxy=None):
end_date = datetime.now()
start_date = end_date - timedelta(weeks=2)
timeout_settings = under_test.TimeoutSettings(
end_date=end_date,
start_date=start_date,
)
def ns(relative_name): # pylint: disable=invalid-name
"""Return a full name from a name relative to the test module"s name space."""
return NS + "." + relative_name
def build_mock_service(resmoke_proxy=None):
return under_test.TimeoutService(
evg_api=evg_api if evg_api else MagicMock(spec_set=EvergreenApi),
resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService),
timeout_settings=timeout_settings)
resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService))
def tst_stat_mock(file, duration, pass_count):
return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count)
return MagicMock(test_name=file, avg_duration_pass=duration, num_pass=pass_count, hooks=[])
def tst_runtime_mock(file, duration, pass_count):
return MagicMock(test_name=file, avg_duration_pass=duration, num_pass=pass_count)
class TestGetTimeoutEstimate(unittest.TestCase):
def test_no_stats_should_return_default_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
mock_evg_api.test_stats_by_project.return_value = []
timeout_service = build_mock_service(evg_api=mock_evg_api)
@patch(ns("HistoricTaskData.from_s3"))
def test_no_stats_should_return_default_timeout(self, from_s3_mock: MagicMock):
timeout_service = build_mock_service()
from_s3_mock.return_value = []
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
@ -48,13 +49,17 @@ class TestGetTimeoutEstimate(unittest.TestCase):
self.assertFalse(timeout.is_specified())
def test_a_test_with_missing_history_should_cause_a_default_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
mock_evg_api.test_stats_by_project.return_value = test_stats
@patch(ns("HistoricTaskData.from_s3"))
def test_a_test_with_missing_history_should_cause_a_default_timeout(
self, from_s3_mock: MagicMock):
test_stats = [
HistoricTestInfo(test_name=f"test_{i}.js", avg_duration=60, num_pass=1, hooks=[])
for i in range(30)
]
from_s3_mock.return_value = HistoricTaskData(test_stats)
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
mock_resmoke_proxy.list_tests.return_value = ["test_with_no_stats.js"]
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
@ -67,14 +72,19 @@ class TestGetTimeoutEstimate(unittest.TestCase):
self.assertFalse(timeout.is_specified())
def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
test_stats.append(tst_stat_mock("zero.js", 0.0, 1))
mock_evg_api.test_stats_by_project.return_value = test_stats
@patch(ns("HistoricTaskData.from_s3"))
def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(
self, from_s3_mock: MagicMock):
test_stats = [
HistoricTestInfo(test_name=f"test_{i}.js", avg_duration=60, num_pass=1, hooks=[])
for i in range(30)
]
test_stats.append(
HistoricTestInfo(test_name="zero.js", avg_duration=0.0, num_pass=1, hooks=[]))
from_s3_mock.return_value = HistoricTaskData(test_stats)
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
mock_resmoke_proxy.list_tests.return_value = [ts.test_name for ts in test_stats]
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
@ -87,15 +97,19 @@ class TestGetTimeoutEstimate(unittest.TestCase):
self.assertFalse(timeout.is_specified())
def test_all_tests_with_runtime_history_should_use_custom_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
@patch(ns("HistoricTaskData.from_s3"))
def test_all_tests_with_runtime_history_should_use_custom_timeout(self,
from_s3_mock: MagicMock):
n_tests = 30
test_runtime = 600
test_stats = [tst_stat_mock(f"test_{i}.js", test_runtime, 1) for i in range(n_tests)]
mock_evg_api.test_stats_by_project.return_value = test_stats
test_stats = [
HistoricTestInfo(test_name=f"test_{i}.js", avg_duration=test_runtime, num_pass=1,
hooks=[]) for i in range(n_tests)
]
from_s3_mock.return_value = HistoricTaskData(test_stats)
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
mock_resmoke_proxy.list_tests.return_value = [ts.test_name for ts in test_stats]
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
@ -149,10 +163,10 @@ class TestGetTaskHookOverhead(unittest.TestCase):
class TestLookupHistoricStats(unittest.TestCase):
def test_no_stats_from_evergreen_should_return_none(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
mock_evg_api.test_stats_by_project.return_value = []
timeout_service = build_mock_service(evg_api=mock_evg_api)
@patch(ns("HistoricTaskData.from_s3"))
def test_no_stats_from_evergreen_should_return_none(self, from_s3_mock: MagicMock):
from_s3_mock.return_value = None
timeout_service = build_mock_service()
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
@ -165,10 +179,10 @@ class TestLookupHistoricStats(unittest.TestCase):
self.assertIsNone(stats)
def test_errors_from_evergreen_should_return_none(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
mock_evg_api.test_stats_by_project.side_effect = HTTPError("failed to connect")
timeout_service = build_mock_service(evg_api=mock_evg_api)
@patch(ns("HistoricTaskData.from_s3"))
def test_errors_from_evergreen_should_return_none(self, from_s3_mock: MagicMock):
from_s3_mock.side_effect = HTTPError("failed to connect")
timeout_service = build_mock_service()
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
@ -181,11 +195,11 @@ class TestLookupHistoricStats(unittest.TestCase):
self.assertIsNone(stats)
def test_stats_from_evergreen_should_return_the_stats(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
@patch(ns("HistoricTaskData.from_s3"))
def test_stats_from_evergreen_should_return_the_stats(self, from_s3_mock: MagicMock):
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(100)]
mock_evg_api.test_stats_by_project.return_value = test_stats
timeout_service = build_mock_service(evg_api=mock_evg_api)
from_s3_mock.return_value = HistoricTaskData(test_stats)
timeout_service = build_mock_service()
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",

View File

@ -78,7 +78,7 @@ class TestHistoricTaskData(unittest.TestCase):
@staticmethod
def _make_evg_result(test_file="dir/test1.js", num_pass=0, duration=0):
return Mock(
test_file=test_file,
test_name=test_file,
task_name="task1",
variant="variant1",
distro="distro1",

View File

@ -1,5 +1,4 @@
"""Service for determining task timeouts."""
from datetime import datetime
from typing import Any, Dict, NamedTuple, Optional
import inject
@ -31,29 +30,17 @@ class TimeoutParams(NamedTuple):
is_asan: bool
class TimeoutSettings(NamedTuple):
"""Settings for determining timeouts."""
start_date: datetime
end_date: datetime
class TimeoutService:
"""A service for determining task timeouts."""
@inject.autoparams()
def __init__(self, evg_api: EvergreenApi, resmoke_proxy: ResmokeProxyService,
timeout_settings: TimeoutSettings) -> None:
def __init__(self, resmoke_proxy: ResmokeProxyService) -> None:
"""
Initialize the service.
:param evg_api: Evergreen API client.
:param resmoke_proxy: Proxy to query resmoke.
:param timeout_settings: Settings for how timeouts are calculated.
"""
self.evg_api = evg_api
self.resmoke_proxy = resmoke_proxy
self.timeout_settings = timeout_settings
def get_timeout_estimate(self, timeout_params: TimeoutParams) -> TimeoutEstimate:
"""
@ -137,10 +124,8 @@ class TimeoutService:
:return: Historic test results if they exist.
"""
try:
evg_stats = HistoricTaskData.from_evg(
self.evg_api, timeout_params.evg_project, self.timeout_settings.start_date,
self.timeout_settings.end_date, timeout_params.task_name,
timeout_params.build_variant)
evg_stats = HistoricTaskData.from_s3(
timeout_params.evg_project, timeout_params.task_name, timeout_params.build_variant)
if not evg_stats:
LOGGER.warning("No historic runtime information available")
return None

View File

@ -1,15 +1,31 @@
"""Utility to support parsing a TestStat."""
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime
from itertools import chain
from typing import NamedTuple, List, Callable, Optional
from evergreen import EvergreenApi, TestStats
import requests
from requests.adapters import HTTPAdapter, Retry
from buildscripts.util.testname import split_test_hook_name, is_resmoke_hook, get_short_name_from_test_file
TASK_LEVEL_HOOKS = {"CleanEveryN"}
TESTS_STATS_S3_LOCATION = "https://mongo-test-stats.s3.amazonaws.com"
class HistoricalTestInformation(NamedTuple):
"""
Container for information about the historical runtime of a test.
test_name: Name of test.
avg_duration_pass: Average of runtime of test that passed.
num_pass: Number of times the test has passed.
num_fail: Number of times the test has failed.
"""
test_name: str
num_pass: int
num_fail: int
avg_duration_pass: float
class TestRuntime(NamedTuple):
@ -74,9 +90,9 @@ class HistoricHookInfo(NamedTuple):
avg_duration: float
@classmethod
def from_test_stats(cls, test_stats: TestStats) -> "HistoricHookInfo":
def from_test_stats(cls, test_stats: HistoricalTestInformation) -> "HistoricHookInfo":
"""Create an instance from a test_stats object."""
return cls(hook_id=test_stats.test_file, num_pass=test_stats.num_pass,
return cls(hook_id=test_stats.test_name, num_pass=test_stats.num_pass,
avg_duration=test_stats.avg_duration_pass)
def test_name(self) -> str:
@ -101,10 +117,10 @@ class HistoricTestInfo(NamedTuple):
hooks: List[HistoricHookInfo]
@classmethod
def from_test_stats(cls, test_stats: TestStats,
def from_test_stats(cls, test_stats: HistoricalTestInformation,
hooks: List[HistoricHookInfo]) -> "HistoricTestInfo":
"""Create an instance from a test_stats object."""
return cls(test_name=test_stats.test_file, num_pass=test_stats.num_pass,
return cls(test_name=test_stats.test_name, num_pass=test_stats.num_pass,
avg_duration=test_stats.avg_duration_pass, hooks=hooks)
def normalized_test_name(self) -> str:
@ -137,45 +153,56 @@ class HistoricTaskData(object):
"""Initialize the TestStats with raw results from the Evergreen API."""
self.historic_test_results = historic_test_results
@classmethod
def from_evg(cls, evg_api: EvergreenApi, project: str, start_date: datetime, end_date: datetime,
task: str, variant: str) -> "HistoricTaskData":
@staticmethod
def get_stats_from_s3(project: str, task: str, variant: str) -> List[HistoricalTestInformation]:
"""
Retrieve test stats from evergreen for a given task.
Retrieve test stats from s3 for a given task.
:param project: Project to query.
:param task: Task to query.
:param variant: Build variant to query.
:return: A list of the Test stats for the specified task.
"""
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(f"{TESTS_STATS_S3_LOCATION}/{project}/{variant}/{task}")
data = response.json()
return [HistoricalTestInformation(**item) for item in data]
@classmethod
def from_s3(cls, project: str, task: str, variant: str) -> "HistoricTaskData":
"""
Retrieve test stats from s3 for a given task.
:param evg_api: Evergreen API client.
:param project: Project to query.
:param start_date: Start date to query.
:param end_date: End date to query.
:param task: Task to query.
:param variant: Build variant to query.
:return: Test stats for the specified task.
"""
days = (end_date - start_date).days
historic_stats = evg_api.test_stats_by_project(
project, after_date=start_date, before_date=end_date, tasks=[task], variants=[variant],
group_by="test", group_num_days=days)
return cls.from_stats_list(historic_stats)
historical_test_data = cls.get_stats_from_s3(project, task, variant)
return cls.from_stats_list(historical_test_data)
@classmethod
def from_stats_list(cls, historic_stats: List[TestStats]) -> "HistoricTaskData":
def from_stats_list(
cls, historical_test_data: List[HistoricalTestInformation]) -> "HistoricTaskData":
"""
Build historic task data from a list of historic stats.
:param historic_stats: List of historic stats to build from.
:param historical_test_data: A list of information about the runtime of a test.
:return: Historic task data from the list of stats.
"""
hooks = defaultdict(list)
for hook in [stat for stat in historic_stats if is_resmoke_hook(stat.test_file)]:
for hook in [stat for stat in historical_test_data if is_resmoke_hook(stat.test_name)]:
historical_hook = HistoricHookInfo.from_test_stats(hook)
hooks[historical_hook.test_name()].append(historical_hook)
return cls([
HistoricTestInfo.from_test_stats(stat,
hooks[get_short_name_from_test_file(stat.test_file)])
for stat in historic_stats if not is_resmoke_hook(stat.test_file)
hooks[get_short_name_from_test_file(stat.test_name)])
for stat in historical_test_data if not is_resmoke_hook(stat.test_name)
])
def get_tests_runtimes(self) -> List[TestRuntime]:

View File

@ -9,7 +9,6 @@ set -o errexit
activate_venv
$python buildscripts/resmoke_tests_runtime_validate.py \
--resmoke-report-file ./report.json \
--evg-api-config ./.evergreen.yml \
--project-id ${project_id} \
--build-variant ${build_variant} \
--task-name ${task_name}