mirror of
https://github.com/mongodb/mongo.git
synced 2024-11-24 00:17:37 +01:00
02ce1fb959
GitOrigin-RevId: cf820864cbfde8390760699dba27d28b97dddf96
380 lines
14 KiB
Python
Executable File
380 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Runs clang-tidy in parallel and combines the the results for easier viewing."""
|
|
|
|
import argparse
|
|
import datetime
|
|
import hashlib
|
|
import json
|
|
import locale
|
|
import math
|
|
import multiprocessing
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from concurrent import futures
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
import yaml
|
|
from clang_tidy_vscode import CHECKS_SO
|
|
from simple_report import make_report, put_report, try_combine_reports
|
|
|
|
|
|
def _clang_tidy_executor(
|
|
clang_tidy_filename: Path,
|
|
clang_tidy_binary: str,
|
|
clang_tidy_cfg: Dict[str, Any],
|
|
output_dir: str,
|
|
show_stdout: bool,
|
|
mongo_check_module: str = "",
|
|
compile_commands: str = "compile_commands.json",
|
|
) -> Tuple[str, Optional[str]]:
|
|
clang_tidy_parent_dir = output_dir / clang_tidy_filename.parent
|
|
os.makedirs(clang_tidy_parent_dir, exist_ok=True)
|
|
|
|
output_filename_base = clang_tidy_parent_dir / clang_tidy_filename.name
|
|
output_filename_fixes = output_filename_base.with_suffix(".yml")
|
|
|
|
if mongo_check_module:
|
|
load_module_option = ["-load", mongo_check_module]
|
|
else:
|
|
load_module_option = []
|
|
|
|
clang_tidy_command = [
|
|
clang_tidy_binary,
|
|
*load_module_option,
|
|
"-p",
|
|
os.path.dirname(compile_commands),
|
|
clang_tidy_filename,
|
|
f"-export-fixes={output_filename_fixes}",
|
|
f"-config={json.dumps(clang_tidy_cfg)}",
|
|
"--warnings-as-errors=*,-clang-diagnostic-builtin-macro-redefined",
|
|
]
|
|
proc = subprocess.run(clang_tidy_command, capture_output=True, check=False)
|
|
files_to_parse = None
|
|
if proc.returncode != 0:
|
|
output_filename_out = output_filename_base.with_suffix(".fail")
|
|
files_to_parse = output_filename_fixes
|
|
if not show_stdout:
|
|
print(
|
|
f"Running clang-tidy on {clang_tidy_filename} had errors see {output_filename_out}"
|
|
)
|
|
else:
|
|
print(f"Running clang-tidy on {clang_tidy_filename}")
|
|
print(f"{proc.stderr.decode(locale.getpreferredencoding())}")
|
|
print(f"{proc.stdout.decode(locale.getpreferredencoding())}")
|
|
else:
|
|
output_filename_out = output_filename_base.with_suffix(".pass")
|
|
if not show_stdout:
|
|
print(f"Running clang-tidy on {clang_tidy_filename} had no errors")
|
|
|
|
with open(output_filename_out, "wb") as output:
|
|
output.write(proc.stderr)
|
|
output.write(proc.stdout)
|
|
return proc.stdout.decode(locale.getpreferredencoding()), files_to_parse
|
|
|
|
|
|
def _combine_errors(fixes_filename: str, files_to_parse: List[str]) -> int:
|
|
failed_files = 0
|
|
all_fixes = {}
|
|
|
|
# loop files_to_parse and count the number of failed_files
|
|
for item in files_to_parse:
|
|
if item is None:
|
|
continue
|
|
failed_files += 1
|
|
|
|
# Read the yaml fixes for the file to combine them with the other suggested fixes
|
|
with open(item) as input_yml:
|
|
fixes = yaml.safe_load(input_yml)
|
|
for fix in fixes["Diagnostics"]:
|
|
fix_msg = None
|
|
if "Notes" in fix:
|
|
fix_msg = fix["Notes"][0]
|
|
if len(fix["Notes"]) > 1:
|
|
print(f'Warning: this script may be missing values in [{fix["Notes"]}]')
|
|
else:
|
|
fix_msg = fix["DiagnosticMessage"]
|
|
fix_data = (
|
|
all_fixes.setdefault(fix["DiagnosticName"], {})
|
|
.setdefault(fix_msg.get("FilePath", "FilePath Not Found"), {})
|
|
.setdefault(
|
|
str(fix_msg.get("FileOffset", "FileOffset Not Found")),
|
|
{
|
|
"replacements": fix_msg.get("Replacements", "Replacements not found"),
|
|
"message": fix_msg.get("Message", "Message not found"),
|
|
"count": 0,
|
|
"source_files": [],
|
|
},
|
|
)
|
|
)
|
|
fix_data["count"] += 1
|
|
fix_data["source_files"].append(fixes["MainSourceFile"])
|
|
if fix_msg.get("FilePath") and os.path.exists(fix_msg.get("FilePath")):
|
|
all_fixes[fix["DiagnosticName"]][fix_msg.get("FilePath")]["md5"] = hashlib.md5(
|
|
open(fix_msg.get("FilePath"), "rb").read()
|
|
).hexdigest()
|
|
|
|
with open(fixes_filename, "w") as files_file:
|
|
json.dump(all_fixes, files_file, indent=4, sort_keys=True)
|
|
|
|
return failed_files
|
|
|
|
|
|
def __dedup_errors(clang_tidy_errors_threads: List[str]) -> str:
|
|
unique_single_errors = set()
|
|
for errs in clang_tidy_errors_threads:
|
|
if errs:
|
|
lines = errs.splitlines()
|
|
single_error_start_line = 0
|
|
for i, line in enumerate(lines):
|
|
if line:
|
|
# the first line of one single error message like:
|
|
# ......./d_concurrency.h:175:13: error: .........
|
|
# trying to match :lineNumber:colomnNumber:
|
|
matched_regex = re.match("(.+:[0-9]+:[0-9]+:)", line)
|
|
|
|
# Collect a full single error message
|
|
# when we find another match or reach the last line of the text
|
|
if matched_regex and i != single_error_start_line:
|
|
unique_single_errors.add(tuple(lines[single_error_start_line:i]))
|
|
single_error_start_line = i
|
|
elif i == len(lines) - 1:
|
|
unique_single_errors.add(tuple(lines[single_error_start_line : i + 1]))
|
|
|
|
unique_single_error_flatten = [item for sublist in unique_single_errors for item in sublist]
|
|
return os.linesep.join(unique_single_error_flatten)
|
|
|
|
|
|
def _run_tidy(args, parser_defaults):
|
|
clang_tidy_binary = f"/opt/mongodbtoolchain/{args.clang_tidy_toolchain}/bin/clang-tidy"
|
|
|
|
if os.path.exists(args.check_module):
|
|
mongo_tidy_check_module = args.check_module
|
|
else:
|
|
mongo_tidy_check_module = ""
|
|
|
|
if os.path.exists(args.compile_commands):
|
|
with open(args.compile_commands) as compile_commands:
|
|
compile_commands = sorted(json.load(compile_commands), key=lambda x: x["file"])
|
|
else:
|
|
if args.compile_commands == parser_defaults.compile_commands:
|
|
print(
|
|
f"Could not find compile commands: '{args.compile_commands}', to generate it, use the build command:\n\n"
|
|
+ "python3 buildscripts/scons.py --build-profile=compiledb compiledb\n"
|
|
)
|
|
else:
|
|
print(f"Could not find compile commands: {args.compile_commands}")
|
|
sys.exit(1)
|
|
|
|
if os.path.exists(args.clang_tidy_cfg):
|
|
with open(args.clang_tidy_cfg) as clang_tidy_cfg:
|
|
clang_tidy_cfg = yaml.safe_load(clang_tidy_cfg)
|
|
else:
|
|
if args.clang_tidy_cfg == parser_defaults.clang_tidy_cfg:
|
|
print(
|
|
f"Could not find config file: '{args.clang_tidy_cfg}', to generate it, use the build command:\n\n"
|
|
+ "python3 buildscripts/scons.py --build-profile=compiledb compiledb\n"
|
|
)
|
|
else:
|
|
print(f"Could not find config file: {args.clang_tidy_cfg}")
|
|
sys.exit(1)
|
|
|
|
if args.split_jobs < 0:
|
|
print(f"--split-jobs: '{args.split_jobs}' must positive integer.")
|
|
sys.exit(1)
|
|
|
|
if args.split_jobs != 0:
|
|
if args.split < 1 or args.split > args.split_jobs:
|
|
print(
|
|
f"--split: '{args.split}' must be a value between 1 and --split-jobs: '{args.split_jobs}'"
|
|
)
|
|
sys.exit(1)
|
|
|
|
files_to_tidy: List[Path] = list()
|
|
files_to_parse = list()
|
|
filtered_compile_commands = []
|
|
for file_doc in compile_commands:
|
|
# A few special cases of files to ignore
|
|
if not file_doc["file"].startswith("src/mongo/"):
|
|
continue
|
|
|
|
# Don't run clang_tidy on the streams/third_party code.
|
|
if file_doc["file"].startswith("src/mongo/db/modules/enterprise/src/streams/third_party"):
|
|
continue
|
|
|
|
# TODO SERVER-49884 Remove this when we no longer check in generated Bison.
|
|
if file_doc["file"].endswith("/parser_gen.cpp"):
|
|
continue
|
|
filtered_compile_commands.append(file_doc)
|
|
|
|
if args.split_jobs != 0:
|
|
original_compile_commands = filtered_compile_commands.copy()
|
|
total = len(filtered_compile_commands)
|
|
extra = total % args.split_jobs
|
|
chunk_size = int(total / args.split_jobs) + math.ceil(extra / args.split_jobs)
|
|
chunks = [
|
|
filtered_compile_commands[i : i + chunk_size]
|
|
for i in range(0, len(filtered_compile_commands), chunk_size)
|
|
]
|
|
# verify we aren't silently forgetting anything.
|
|
for chunk in chunks:
|
|
for file_doc in chunk:
|
|
original_compile_commands.remove(file_doc)
|
|
if len(original_compile_commands) != 0:
|
|
raise Exception(
|
|
"Total compile_commands different from sum of splits! This means something could silently be ignored!"
|
|
)
|
|
filtered_compile_commands = chunks[args.split - 1]
|
|
|
|
files_to_tidy = [Path(file_doc["file"]) for file_doc in filtered_compile_commands]
|
|
|
|
total_jobs = len(files_to_tidy)
|
|
workers = args.threads
|
|
|
|
clang_tidy_errors_futures: List[str] = []
|
|
clang_tidy_executor_futures: List[futures.ThreadPoolExecutor.submit] = []
|
|
|
|
# total completed tasks
|
|
tasks_completed = 0
|
|
|
|
with futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
|
start_time = time.time()
|
|
|
|
# submit all futures
|
|
for clang_tidy_filename in files_to_tidy:
|
|
clang_tidy_executor_futures.append(
|
|
executor.submit(
|
|
_clang_tidy_executor,
|
|
clang_tidy_filename,
|
|
clang_tidy_binary,
|
|
clang_tidy_cfg,
|
|
args.output_dir,
|
|
args.show_stdout,
|
|
mongo_tidy_check_module,
|
|
compile_commands=args.compile_commands,
|
|
)
|
|
)
|
|
|
|
for future in futures.as_completed(clang_tidy_executor_futures):
|
|
clang_tidy_errors_futures.append(future.result()[0])
|
|
files_to_parse.append(future.result()[1])
|
|
tasks_completed += 1
|
|
pretty_time_duration = str(datetime.timedelta(seconds=time.time() - start_time))
|
|
print(
|
|
f" The number of jobs completed is {tasks_completed}/{total_jobs}. Duration {pretty_time_duration}"
|
|
)
|
|
|
|
return clang_tidy_errors_futures, files_to_parse
|
|
|
|
|
|
def main():
|
|
"""Execute Main entry point."""
|
|
|
|
parser = argparse.ArgumentParser(description="Run multithreaded clang-tidy")
|
|
|
|
parser.add_argument(
|
|
"-t",
|
|
"--threads",
|
|
type=int,
|
|
default=multiprocessing.cpu_count(),
|
|
help="Run with a specific number of threads",
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--output-dir",
|
|
type=str,
|
|
default="clang_tidy_fixes",
|
|
help="Directory to write all clang-tidy output to",
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--fixes-file",
|
|
type=str,
|
|
default="clang_tidy_fixes.json",
|
|
help="Report json file to write combined fixes to",
|
|
)
|
|
parser.add_argument(
|
|
"-c",
|
|
"--compile-commands",
|
|
type=str,
|
|
default="compile_commands.json",
|
|
help="compile_commands.json file to use to find the files to tidy",
|
|
)
|
|
parser.add_argument(
|
|
"--split-jobs",
|
|
type=int,
|
|
default=0,
|
|
help="The total number of splits if splitting the jobs across multiple tasks. 0 means don't split.",
|
|
)
|
|
parser.add_argument(
|
|
"--split",
|
|
type=int,
|
|
default=1,
|
|
help="The interval to run out of the total number of --split-jobs. Must be a value between 1 and --split-jobs value.",
|
|
)
|
|
parser.add_argument(
|
|
"-q", "--show-stdout", type=bool, default=True, help="Log errors to console"
|
|
)
|
|
parser.add_argument(
|
|
"-l", "--log-file", type=str, default="clang_tidy", help="clang tidy log from evergreen"
|
|
)
|
|
parser.add_argument(
|
|
"--only-process-fixes",
|
|
action="store_true",
|
|
help="Skip tidy and process the fixes directory to generate a fixes file. Use in conjunction with -d.",
|
|
)
|
|
parser.add_argument(
|
|
"--disable-reporting",
|
|
action="store_true",
|
|
default=False,
|
|
help="Disable generating the report file for evergreen perf.send",
|
|
)
|
|
parser.add_argument(
|
|
"-m",
|
|
"--check-module",
|
|
type=str,
|
|
default=CHECKS_SO,
|
|
help="Path to load the custom mongo checks module.",
|
|
)
|
|
# TODO: Is there someway to get this without hardcoding this much
|
|
parser.add_argument("-y", "--clang-tidy-toolchain", type=str, default="v4")
|
|
parser.add_argument("-f", "--clang-tidy-cfg", type=str, default=".clang-tidy")
|
|
args = parser.parse_args()
|
|
|
|
if args.only_process_fixes:
|
|
if not os.path.isdir(args.output_dir):
|
|
print(f"Error: {args.output_dir} is not a valid directory.")
|
|
sys.exit(3)
|
|
find_cmd = ["find", args.output_dir, "-type", "f", "-name", "*.yml"]
|
|
find_output = subprocess.Popen(find_cmd, stdout=subprocess.PIPE)
|
|
files_to_parse = []
|
|
for line in iter(find_output.stdout.readline, ""):
|
|
if not line:
|
|
break
|
|
files_to_parse.append(str(line.rstrip().decode("utf-8")))
|
|
else:
|
|
parser_defaults = parser.parse_args([])
|
|
clang_tidy_errors_futures, files_to_parse = _run_tidy(args, parser_defaults)
|
|
|
|
failed_files = _combine_errors(Path(args.output_dir, args.fixes_file), files_to_parse)
|
|
|
|
if not args.only_process_fixes:
|
|
# Zip up all the files for upload
|
|
subprocess.run(["tar", "-czvf", args.output_dir + ".tgz", args.output_dir], check=False)
|
|
|
|
# Create report and dump to report.json
|
|
if not args.disable_reporting:
|
|
error_file_contents = __dedup_errors(clang_tidy_errors_futures)
|
|
report = make_report(args.log_file, error_file_contents, 1 if failed_files > 0 else 0)
|
|
try_combine_reports(report)
|
|
put_report(report)
|
|
|
|
return failed_files
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|