From 1e5719a663d5b1703ad588dda4fccd763c7d3e99 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 18 Feb 2024 21:06:39 +0100 Subject: [PATCH] gh-115122: Add --bisect option to regrtest (#115123) * test.bisect_cmd now exit with code 0 on success, and code 1 on failure. Before, it was the opposite. * test.bisect_cmd now runs the test worker process with -X faulthandler. * regrtest RunTests: Add create_python_cmd() and bisect_cmd() methods. --- Lib/test/bisect_cmd.py | 13 +++-- Lib/test/libregrtest/cmdline.py | 2 + Lib/test/libregrtest/main.py | 58 ++++++++++++++++++- Lib/test/libregrtest/results.py | 13 +++-- Lib/test/libregrtest/runtests.py | 50 ++++++++++++++++ Lib/test/libregrtest/worker.py | 18 +----- Lib/test/test_regrtest.py | 52 ++++++++++++++++- ...-02-18-14-20-52.gh-issue-115122.3rGNo9.rst | 2 + 8 files changed, 178 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst diff --git a/Lib/test/bisect_cmd.py b/Lib/test/bisect_cmd.py index 5cb804bd469..aee2e8ac120 100755 --- a/Lib/test/bisect_cmd.py +++ b/Lib/test/bisect_cmd.py @@ -51,6 +51,7 @@ def python_cmd(): cmd = [sys.executable] cmd.extend(subprocess._args_from_interpreter_flags()) cmd.extend(subprocess._optim_args_from_interpreter_flags()) + cmd.extend(('-X', 'faulthandler')) return cmd @@ -77,9 +78,13 @@ def run_tests(args, tests, huntrleaks=None): write_tests(tmp, tests) cmd = python_cmd() - cmd.extend(['-m', 'test', '--matchfile', tmp]) + cmd.extend(['-u', '-m', 'test', '--matchfile', tmp]) cmd.extend(args.test_args) print("+ %s" % format_shell_args(cmd)) + + sys.stdout.flush() + sys.stderr.flush() + proc = subprocess.run(cmd) return proc.returncode finally: @@ -137,8 +142,8 @@ def main(): ntest = max(ntest // 2, 1) subtests = random.sample(tests, ntest) - print("[+] Iteration %s: run %s tests/%s" - % (iteration, len(subtests), len(tests))) + print(f"[+] Iteration {iteration}/{args.max_iter}: " + f"run {len(subtests)} tests/{len(tests)}") print() exitcode = run_tests(args, subtests) @@ -170,10 +175,10 @@ def main(): if len(tests) <= args.max_tests: print("Bisection completed in %s iterations and %s" % (iteration, datetime.timedelta(seconds=dt))) - sys.exit(1) else: print("Bisection failed after %s iterations and %s" % (iteration, datetime.timedelta(seconds=dt))) + sys.exit(1) if __name__ == "__main__": diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py index 0053bce4292..608b12bb6f2 100644 --- a/Lib/test/libregrtest/cmdline.py +++ b/Lib/test/libregrtest/cmdline.py @@ -347,6 +347,8 @@ def _create_parser(): help='override the working directory for the test run') group.add_argument('--cleanup', action='store_true', help='remove old test_python_* directories') + group.add_argument('--bisect', action='store_true', + help='if some tests fail, run test.bisect_cmd on them') group.add_argument('--dont-add-python-opts', dest='_add_python_opts', action='store_false', help="internal option, don't use it") diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index b24c1b92054..6fbe3d00981 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -7,8 +7,7 @@ import sysconfig import time import trace -from test import support -from test.support import os_helper, MS_WINDOWS +from test.support import os_helper, MS_WINDOWS, flush_std_streams from .cmdline import _parse_args, Namespace from .findtests import findtests, split_test_packages, list_cases @@ -73,6 +72,7 @@ class Regrtest: self.want_cleanup: bool = ns.cleanup self.want_rerun: bool = ns.rerun self.want_run_leaks: bool = ns.runleaks + self.want_bisect: bool = ns.bisect self.ci_mode: bool = (ns.fast_ci or ns.slow_ci) self.want_add_python_opts: bool = (_add_python_opts @@ -273,6 +273,55 @@ class Regrtest: self.display_result(rerun_runtests) + def _run_bisect(self, runtests: RunTests, test: str, progress: str) -> bool: + print() + title = f"Bisect {test}" + if progress: + title = f"{title} ({progress})" + print(title) + print("#" * len(title)) + print() + + cmd = runtests.create_python_cmd() + cmd.extend([ + "-u", "-m", "test.bisect_cmd", + # Limit to 25 iterations (instead of 100) to not abuse CI resources + "--max-iter", "25", + "-v", + # runtests.match_tests is not used (yet) for bisect_cmd -i arg + ]) + cmd.extend(runtests.bisect_cmd_args()) + cmd.append(test) + print("+", shlex.join(cmd), flush=True) + + flush_std_streams() + + import subprocess + proc = subprocess.run(cmd, timeout=runtests.timeout) + exitcode = proc.returncode + + title = f"{title}: exit code {exitcode}" + print(title) + print("#" * len(title)) + print(flush=True) + + if exitcode: + print(f"Bisect failed with exit code {exitcode}") + return False + + return True + + def run_bisect(self, runtests: RunTests) -> None: + tests, _ = self.results.prepare_rerun(clear=False) + + for index, name in enumerate(tests, 1): + if len(tests) > 1: + progress = f"{index}/{len(tests)}" + else: + progress = "" + if not self._run_bisect(runtests, name, progress): + return + def display_result(self, runtests): # If running the test suite for PGO then no one cares about results. if runtests.pgo: @@ -466,7 +515,7 @@ class Regrtest: setup_process() - if self.hunt_refleak and not self.num_workers: + if (runtests.hunt_refleak is not None) and (not self.num_workers): # gh-109739: WindowsLoadTracker thread interfers with refleak check use_load_tracker = False else: @@ -486,6 +535,9 @@ class Regrtest: if self.want_rerun and self.results.need_rerun(): self.rerun_failed_tests(runtests) + + if self.want_bisect and self.results.need_rerun(): + self.run_bisect(runtests) finally: if use_load_tracker: self.logger.stop_load_tracker() diff --git a/Lib/test/libregrtest/results.py b/Lib/test/libregrtest/results.py index a41ea8aba02..85c82052eae 100644 --- a/Lib/test/libregrtest/results.py +++ b/Lib/test/libregrtest/results.py @@ -138,7 +138,7 @@ class TestResults: def need_rerun(self): return bool(self.rerun_results) - def prepare_rerun(self) -> tuple[TestTuple, FilterDict]: + def prepare_rerun(self, *, clear: bool = True) -> tuple[TestTuple, FilterDict]: tests: TestList = [] match_tests_dict = {} for result in self.rerun_results: @@ -149,11 +149,12 @@ class TestResults: if match_tests: match_tests_dict[result.test_name] = match_tests - # Clear previously failed tests - self.rerun_bad.extend(self.bad) - self.bad.clear() - self.env_changed.clear() - self.rerun_results.clear() + if clear: + # Clear previously failed tests + self.rerun_bad.extend(self.bad) + self.bad.clear() + self.env_changed.clear() + self.rerun_results.clear() return (tuple(tests), match_tests_dict) diff --git a/Lib/test/libregrtest/runtests.py b/Lib/test/libregrtest/runtests.py index edd72276320..8e9779524c5 100644 --- a/Lib/test/libregrtest/runtests.py +++ b/Lib/test/libregrtest/runtests.py @@ -2,7 +2,9 @@ import contextlib import dataclasses import json import os +import shlex import subprocess +import sys from typing import Any from test import support @@ -67,6 +69,11 @@ class HuntRefleak: runs: int filename: StrPath + def bisect_cmd_args(self) -> list[str]: + # Ignore filename since it can contain colon (":"), + # and usually it's not used. Use the default filename. + return ["-R", f"{self.warmups}:{self.runs}:"] + @dataclasses.dataclass(slots=True, frozen=True) class RunTests: @@ -137,6 +144,49 @@ class RunTests: or support.is_wasi ) + def create_python_cmd(self) -> list[str]: + python_opts = support.args_from_interpreter_flags() + if self.python_cmd is not None: + executable = self.python_cmd + # Remove -E option, since --python=COMMAND can set PYTHON + # environment variables, such as PYTHONPATH, in the worker + # process. + python_opts = [opt for opt in python_opts if opt != "-E"] + else: + executable = (sys.executable,) + cmd = [*executable, *python_opts] + if '-u' not in python_opts: + cmd.append('-u') # Unbuffered stdout and stderr + if self.coverage: + cmd.append("-Xpresite=test.cov") + return cmd + + def bisect_cmd_args(self) -> list[str]: + args = [] + if self.fail_fast: + args.append("--failfast") + if self.fail_env_changed: + args.append("--fail-env-changed") + if self.timeout: + args.append(f"--timeout={self.timeout}") + if self.hunt_refleak is not None: + args.extend(self.hunt_refleak.bisect_cmd_args()) + if self.test_dir: + args.extend(("--testdir", self.test_dir)) + if self.memory_limit: + args.extend(("--memlimit", self.memory_limit)) + if self.gc_threshold: + args.append(f"--threshold={self.gc_threshold}") + if self.use_resources: + args.extend(("-u", ','.join(self.use_resources))) + if self.python_cmd: + cmd = shlex.join(self.python_cmd) + args.extend(("--python", cmd)) + if self.randomize: + args.append(f"--randomize") + args.append(f"--randseed={self.random_seed}") + return args + @dataclasses.dataclass(slots=True, frozen=True) class WorkerRunTests(RunTests): diff --git a/Lib/test/libregrtest/worker.py b/Lib/test/libregrtest/worker.py index 7a6d33d4499..f8b8e45eca3 100644 --- a/Lib/test/libregrtest/worker.py +++ b/Lib/test/libregrtest/worker.py @@ -3,7 +3,6 @@ import sys import os from typing import Any, NoReturn -from test import support from test.support import os_helper, Py_DEBUG from .setup import setup_process, setup_test_dir @@ -19,23 +18,10 @@ USE_PROCESS_GROUP = (hasattr(os, "setsid") and hasattr(os, "killpg")) def create_worker_process(runtests: WorkerRunTests, output_fd: int, tmp_dir: StrPath | None = None) -> subprocess.Popen: - python_cmd = runtests.python_cmd worker_json = runtests.as_json() - python_opts = support.args_from_interpreter_flags() - if python_cmd is not None: - executable = python_cmd - # Remove -E option, since --python=COMMAND can set PYTHON environment - # variables, such as PYTHONPATH, in the worker process. - python_opts = [opt for opt in python_opts if opt != "-E"] - else: - executable = (sys.executable,) - if runtests.coverage: - python_opts.append("-Xpresite=test.cov") - cmd = [*executable, *python_opts, - '-u', # Unbuffered stdout and stderr - '-m', 'test.libregrtest.worker', - worker_json] + cmd = runtests.create_python_cmd() + cmd.extend(['-m', 'test.libregrtest.worker', worker_json]) env = dict(os.environ) if tmp_dir is not None: diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 89562fa5eac..b80e0524593 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -399,7 +399,7 @@ class ParseArgsTestCase(unittest.TestCase): self.checkError(['--unknown-option'], 'unrecognized arguments: --unknown-option') - def check_ci_mode(self, args, use_resources, rerun=True): + def create_regrtest(self, args): ns = cmdline._parse_args(args) # Check Regrtest attributes which are more reliable than Namespace @@ -411,6 +411,10 @@ class ParseArgsTestCase(unittest.TestCase): regrtest = main.Regrtest(ns) + return regrtest + + def check_ci_mode(self, args, use_resources, rerun=True): + regrtest = self.create_regrtest(args) self.assertEqual(regrtest.num_workers, -1) self.assertEqual(regrtest.want_rerun, rerun) self.assertTrue(regrtest.randomize) @@ -455,6 +459,11 @@ class ParseArgsTestCase(unittest.TestCase): ns = cmdline._parse_args(args) self.assertFalse(ns._add_python_opts) + def test_bisect(self): + args = ['--bisect'] + regrtest = self.create_regrtest(args) + self.assertTrue(regrtest.want_bisect) + @dataclasses.dataclass(slots=True) class Rerun: @@ -1192,6 +1201,47 @@ class ArgsTestCase(BaseTestCase): def test_huntrleaks_mp(self): self.check_huntrleaks(run_workers=True) + @unittest.skipUnless(support.Py_DEBUG, 'need a debug build') + def test_huntrleaks_bisect(self): + # test --huntrleaks --bisect + code = textwrap.dedent(""" + import unittest + + GLOBAL_LIST = [] + + class RefLeakTest(unittest.TestCase): + def test1(self): + pass + + def test2(self): + pass + + def test3(self): + GLOBAL_LIST.append(object()) + + def test4(self): + pass + """) + + test = self.create_test('huntrleaks', code=code) + + filename = 'reflog.txt' + self.addCleanup(os_helper.unlink, filename) + cmd = ['--huntrleaks', '3:3:', '--bisect', test] + output = self.run_tests(*cmd, + exitcode=EXITCODE_BAD_TEST, + stderr=subprocess.STDOUT) + + self.assertIn(f"Bisect {test}", output) + self.assertIn(f"Bisect {test}: exit code 0", output) + + # test3 is the one which leaks + self.assertIn("Bisection completed in", output) + self.assertIn( + "Tests (1):\n" + f"* {test}.RefLeakTest.test3\n", + output) + @unittest.skipUnless(support.Py_DEBUG, 'need a debug build') def test_huntrleaks_fd_leak(self): # test --huntrleaks for file descriptor leak diff --git a/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst b/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst new file mode 100644 index 00000000000..e187a40a405 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst @@ -0,0 +1,2 @@ +Add ``--bisect`` option to regrtest test runner: run failed tests with +``test.bisect_cmd`` to identify failing tests. Patch by Victor Stinner.