mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
120 lines
4.2 KiB
Python
120 lines
4.2 KiB
Python
|
#!/usr/bin/env python3
|
||
|
"""Powercycle tasks sentinel.
|
||
|
|
||
|
Error out when any powercycle task on the same buildvariant runs for more than 2 hours.
|
||
|
"""
|
||
|
import logging
|
||
|
import os
|
||
|
import sys
|
||
|
import time
|
||
|
from datetime import datetime, timezone
|
||
|
from typing import List
|
||
|
|
||
|
import click
|
||
|
import structlog
|
||
|
from evergreen import RetryingEvergreenApi, EvergreenApi
|
||
|
|
||
|
from buildscripts.util.read_config import read_config_file
|
||
|
|
||
|
LOGGER = structlog.getLogger(__name__)
|
||
|
|
||
|
EVERGREEN_HOST = "https://evergreen.mongodb.com"
|
||
|
EVERGREEN_CONFIG_LOCATIONS = (
|
||
|
# Common for machines in Evergreen
|
||
|
os.path.join(os.getcwd(), ".evergreen.yml"),
|
||
|
# Common for local machines
|
||
|
os.path.expanduser(os.path.join("~", ".evergreen.yml")),
|
||
|
)
|
||
|
POWERCYCLE_TASK_EXEC_TIMEOUT_SECS = 2 * 60 * 60
|
||
|
WATCH_INTERVAL_SECS = 5 * 60
|
||
|
|
||
|
|
||
|
def get_evergreen_api() -> EvergreenApi:
|
||
|
"""Return evergreen API."""
|
||
|
# Pickup the first config file found in common locations.
|
||
|
for file in EVERGREEN_CONFIG_LOCATIONS:
|
||
|
if os.path.isfile(file):
|
||
|
evg_api = RetryingEvergreenApi.get_api(config_file=file)
|
||
|
return evg_api
|
||
|
|
||
|
LOGGER.error("Evergreen config not found in locations.", locations=EVERGREEN_CONFIG_LOCATIONS)
|
||
|
sys.exit(1)
|
||
|
|
||
|
|
||
|
def watch_tasks(task_ids: List[str], evg_api: EvergreenApi, watch_interval_secs: int) -> List[str]:
|
||
|
"""Watch tasks if they run longer than exec timeout."""
|
||
|
watch_task_ids = task_ids[:]
|
||
|
long_running_task_ids = []
|
||
|
|
||
|
while watch_task_ids:
|
||
|
LOGGER.info("Looking if powercycle tasks are still running on the current buildvariant.")
|
||
|
powercycle_tasks = [evg_api.task_by_id(task_id) for task_id in watch_task_ids]
|
||
|
for task in powercycle_tasks:
|
||
|
if task.finish_time:
|
||
|
watch_task_ids.remove(task.task_id)
|
||
|
elif task.start_time and (datetime.now(timezone.utc) - task.start_time
|
||
|
).total_seconds() > POWERCYCLE_TASK_EXEC_TIMEOUT_SECS:
|
||
|
long_running_task_ids.append(task.task_id)
|
||
|
watch_task_ids.remove(task.task_id)
|
||
|
if watch_task_ids:
|
||
|
time.sleep(watch_interval_secs)
|
||
|
|
||
|
return long_running_task_ids
|
||
|
|
||
|
|
||
|
def get_links(task_ids: List[str]) -> str:
|
||
|
"""Return evergreen task urls delimited by newline."""
|
||
|
return "\n".join([f"{EVERGREEN_HOST}/task/{task_id}" for task_id in task_ids])
|
||
|
|
||
|
|
||
|
@click.command()
|
||
|
@click.argument("expansions_file", type=str, default="expansions.yml")
|
||
|
def main(expansions_file: str = "expansions.yml") -> None:
|
||
|
"""Implementation."""
|
||
|
|
||
|
logging.basicConfig(
|
||
|
format="[%(levelname)s] %(message)s",
|
||
|
level=logging.INFO,
|
||
|
stream=sys.stdout,
|
||
|
)
|
||
|
structlog.configure(logger_factory=structlog.stdlib.LoggerFactory())
|
||
|
|
||
|
expansions = read_config_file(expansions_file)
|
||
|
build_id = expansions["build_id"]
|
||
|
current_task_id = expansions["task_id"]
|
||
|
gen_task_name = expansions["gen_task"]
|
||
|
|
||
|
evg_api = get_evergreen_api()
|
||
|
|
||
|
build_tasks = evg_api.tasks_by_build(build_id)
|
||
|
gen_task_id = [task.task_id for task in build_tasks if gen_task_name in task.task_id][0]
|
||
|
gen_task_url = f"{EVERGREEN_HOST}/task/{gen_task_id}"
|
||
|
|
||
|
while evg_api.task_by_id(gen_task_id).is_active():
|
||
|
LOGGER.info(
|
||
|
f"Waiting for '{gen_task_name}' task to generate powercycle tasks:\n{gen_task_url}")
|
||
|
time.sleep(WATCH_INTERVAL_SECS)
|
||
|
|
||
|
build_tasks = evg_api.tasks_by_build(build_id)
|
||
|
powercycle_task_ids = [
|
||
|
task.task_id for task in build_tasks
|
||
|
if not task.display_only and task.task_id != current_task_id and task.task_id != gen_task_id
|
||
|
and "powercycle" in task.task_id
|
||
|
]
|
||
|
LOGGER.info(f"Watching powercycle tasks:\n{get_links(powercycle_task_ids)}")
|
||
|
|
||
|
long_running_task_ids = watch_tasks(powercycle_task_ids, evg_api, WATCH_INTERVAL_SECS)
|
||
|
if long_running_task_ids:
|
||
|
LOGGER.error(
|
||
|
f"Found powercycle tasks that are running for more than {POWERCYCLE_TASK_EXEC_TIMEOUT_SECS} "
|
||
|
f"seconds and most likely something is going wrong in those tasks:\n{get_links(long_running_task_ids)}"
|
||
|
)
|
||
|
LOGGER.error(
|
||
|
"Hopefully hosts from the tasks are still in run at the time you are seeing this "
|
||
|
"and the Build team is able to check them to diagnose the issue.")
|
||
|
sys.exit(1)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|