gh-115122: Add --bisect option to regrtest (#115123)

* test.bisect_cmd now exit with code 0 on success, and code 1 on failure. Before, it was the opposite. * test.bisect_cmd now runs the test worker process with -X faulthandler. * regrtest RunTests: Add create_python_cmd() and bisect_cmd() methods.
2024-11-23 18:04:37 +08:00 · 2024-02-18 21:06:39 +01:00 · 2024-02-18 21:06:39 +01:00 · 1e5719a663
commit 1e5719a663
parent 0c80da4c14
8 changed files with 178 additions and 30 deletions
--- a/Lib/test/bisect_cmd.py
+++ b/Lib/test/bisect_cmd.py
@ -51,6 +51,7 @@ def python_cmd():
    cmd = [sys.executable]
    cmd.extend(subprocess._args_from_interpreter_flags())
    cmd.extend(subprocess._optim_args_from_interpreter_flags())
+    cmd.extend(('-X', 'faulthandler'))
    return cmd


@ -77,9 +78,13 @@ def run_tests(args, tests, huntrleaks=None):
        write_tests(tmp, tests)

        cmd = python_cmd()
-        cmd.extend(['-m', 'test', '--matchfile', tmp])
+        cmd.extend(['-u', '-m', 'test', '--matchfile', tmp])
        cmd.extend(args.test_args)
        print("+ %s" % format_shell_args(cmd))
+
+        sys.stdout.flush()
+        sys.stderr.flush()
+
        proc = subprocess.run(cmd)
        return proc.returncode
    finally:
@ -137,8 +142,8 @@ def main():
            ntest = max(ntest // 2, 1)
            subtests = random.sample(tests, ntest)

-            print("[+] Iteration %s: run %s tests/%s"
-                  % (iteration, len(subtests), len(tests)))
+            print(f"[+] Iteration {iteration}/{args.max_iter}: "
+                  f"run {len(subtests)} tests/{len(tests)}")
            print()

            exitcode = run_tests(args, subtests)
@ -170,10 +175,10 @@ def main():
    if len(tests) <= args.max_tests:
        print("Bisection completed in %s iterations and %s"
              % (iteration, datetime.timedelta(seconds=dt)))
-        sys.exit(1)
    else:
        print("Bisection failed after %s iterations and %s"
              % (iteration, datetime.timedelta(seconds=dt)))
+        sys.exit(1)


 if __name__ == "__main__":
--- a/Lib/test/libregrtest/cmdline.py
+++ b/Lib/test/libregrtest/cmdline.py
@ -347,6 +347,8 @@ def _create_parser():
                       help='override the working directory for the test run')
    group.add_argument('--cleanup', action='store_true',
                       help='remove old test_python_* directories')
+    group.add_argument('--bisect', action='store_true',
+                       help='if some tests fail, run test.bisect_cmd on them')
    group.add_argument('--dont-add-python-opts', dest='_add_python_opts',
                       action='store_false',
                       help="internal option, don't use it")
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@ -7,8 +7,7 @@ import sysconfig
 import time
 import trace

-from test import support
-from test.support import os_helper, MS_WINDOWS
+from test.support import os_helper, MS_WINDOWS, flush_std_streams

 from .cmdline import _parse_args, Namespace
 from .findtests import findtests, split_test_packages, list_cases
@ -73,6 +72,7 @@ class Regrtest:
        self.want_cleanup: bool = ns.cleanup
        self.want_rerun: bool = ns.rerun
        self.want_run_leaks: bool = ns.runleaks
+        self.want_bisect: bool = ns.bisect

        self.ci_mode: bool = (ns.fast_ci or ns.slow_ci)
        self.want_add_python_opts: bool = (_add_python_opts
@ -273,6 +273,55 @@ class Regrtest:

        self.display_result(rerun_runtests)

+    def _run_bisect(self, runtests: RunTests, test: str, progress: str) -> bool:
+        print()
+        title = f"Bisect {test}"
+        if progress:
+            title = f"{title} ({progress})"
+        print(title)
+        print("#" * len(title))
+        print()
+
+        cmd = runtests.create_python_cmd()
+        cmd.extend([
+            "-u", "-m", "test.bisect_cmd",
+            # Limit to 25 iterations (instead of 100) to not abuse CI resources
+            "--max-iter", "25",
+            "-v",
+            # runtests.match_tests is not used (yet) for bisect_cmd -i arg
+        ])
+        cmd.extend(runtests.bisect_cmd_args())
+        cmd.append(test)
+        print("+", shlex.join(cmd), flush=True)
+
+        flush_std_streams()
+
+        import subprocess
+        proc = subprocess.run(cmd, timeout=runtests.timeout)
+        exitcode = proc.returncode
+
+        title = f"{title}: exit code {exitcode}"
+        print(title)
+        print("#" * len(title))
+        print(flush=True)
+
+        if exitcode:
+            print(f"Bisect failed with exit code {exitcode}")
+            return False
+
+        return True
+
+    def run_bisect(self, runtests: RunTests) -> None:
+        tests, _ = self.results.prepare_rerun(clear=False)
+
+        for index, name in enumerate(tests, 1):
+            if len(tests) > 1:
+                progress = f"{index}/{len(tests)}"
+            else:
+                progress = ""
+            if not self._run_bisect(runtests, name, progress):
+                return
+
    def display_result(self, runtests):
        # If running the test suite for PGO then no one cares about results.
        if runtests.pgo:
@ -466,7 +515,7 @@ class Regrtest:

        setup_process()

-        if self.hunt_refleak and not self.num_workers:
+        if (runtests.hunt_refleak is not None) and (not self.num_workers):
            # gh-109739: WindowsLoadTracker thread interfers with refleak check
            use_load_tracker = False
        else:
@ -486,6 +535,9 @@ class Regrtest:

            if self.want_rerun and self.results.need_rerun():
                self.rerun_failed_tests(runtests)
+
+            if self.want_bisect and self.results.need_rerun():
+                self.run_bisect(runtests)
        finally:
            if use_load_tracker:
                self.logger.stop_load_tracker()
--- a/Lib/test/libregrtest/results.py
+++ b/Lib/test/libregrtest/results.py
@ -138,7 +138,7 @@ class TestResults:
    def need_rerun(self):
        return bool(self.rerun_results)

-    def prepare_rerun(self) -> tuple[TestTuple, FilterDict]:
+    def prepare_rerun(self, *, clear: bool = True) -> tuple[TestTuple, FilterDict]:
        tests: TestList = []
        match_tests_dict = {}
        for result in self.rerun_results:
@ -149,6 +149,7 @@ class TestResults:
            if match_tests:
                match_tests_dict[result.test_name] = match_tests

+        if clear:
            # Clear previously failed tests
            self.rerun_bad.extend(self.bad)
            self.bad.clear()
--- a/Lib/test/libregrtest/runtests.py
+++ b/Lib/test/libregrtest/runtests.py
@ -2,7 +2,9 @@ import contextlib
 import dataclasses
 import json
 import os
+import shlex
 import subprocess
+import sys
 from typing import Any

 from test import support
@ -67,6 +69,11 @@ class HuntRefleak:
    runs: int
    filename: StrPath

+    def bisect_cmd_args(self) -> list[str]:
+        # Ignore filename since it can contain colon (":"),
+        # and usually it's not used. Use the default filename.
+        return ["-R", f"{self.warmups}:{self.runs}:"]
+

@dataclasses.dataclass(slots=True, frozen=True)
 class RunTests:
@ -137,6 +144,49 @@ class RunTests:
            or support.is_wasi
        )

+    def create_python_cmd(self) -> list[str]:
+        python_opts = support.args_from_interpreter_flags()
+        if self.python_cmd is not None:
+            executable = self.python_cmd
+            # Remove -E option, since --python=COMMAND can set PYTHON
+            # environment variables, such as PYTHONPATH, in the worker
+            # process.
+            python_opts = [opt for opt in python_opts if opt != "-E"]
+        else:
+            executable = (sys.executable,)
+        cmd = [*executable, *python_opts]
+        if '-u' not in python_opts:
+            cmd.append('-u')  # Unbuffered stdout and stderr
+        if self.coverage:
+            cmd.append("-Xpresite=test.cov")
+        return cmd
+
+    def bisect_cmd_args(self) -> list[str]:
+        args = []
+        if self.fail_fast:
+            args.append("--failfast")
+        if self.fail_env_changed:
+            args.append("--fail-env-changed")
+        if self.timeout:
+            args.append(f"--timeout={self.timeout}")
+        if self.hunt_refleak is not None:
+            args.extend(self.hunt_refleak.bisect_cmd_args())
+        if self.test_dir:
+            args.extend(("--testdir", self.test_dir))
+        if self.memory_limit:
+            args.extend(("--memlimit", self.memory_limit))
+        if self.gc_threshold:
+            args.append(f"--threshold={self.gc_threshold}")
+        if self.use_resources:
+            args.extend(("-u", ','.join(self.use_resources)))
+        if self.python_cmd:
+            cmd = shlex.join(self.python_cmd)
+            args.extend(("--python", cmd))
+        if self.randomize:
+            args.append(f"--randomize")
+        args.append(f"--randseed={self.random_seed}")
+        return args
+

@dataclasses.dataclass(slots=True, frozen=True)
 class WorkerRunTests(RunTests):
--- a/Lib/test/libregrtest/worker.py
+++ b/Lib/test/libregrtest/worker.py
@ -3,7 +3,6 @@ import sys
 import os
 from typing import Any, NoReturn

-from test import support
 from test.support import os_helper, Py_DEBUG

 from .setup import setup_process, setup_test_dir
@ -19,23 +18,10 @@ USE_PROCESS_GROUP = (hasattr(os, "setsid") and hasattr(os, "killpg"))

 def create_worker_process(runtests: WorkerRunTests, output_fd: int,
                          tmp_dir: StrPath | None = None) -> subprocess.Popen:
-    python_cmd = runtests.python_cmd
    worker_json = runtests.as_json()

-    python_opts = support.args_from_interpreter_flags()
-    if python_cmd is not None:
-        executable = python_cmd
-        # Remove -E option, since --python=COMMAND can set PYTHON environment
-        # variables, such as PYTHONPATH, in the worker process.
-        python_opts = [opt for opt in python_opts if opt != "-E"]
-    else:
-        executable = (sys.executable,)
-    if runtests.coverage:
-        python_opts.append("-Xpresite=test.cov")
-    cmd = [*executable, *python_opts,
-           '-u',    # Unbuffered stdout and stderr
-           '-m', 'test.libregrtest.worker',
-           worker_json]
+    cmd = runtests.create_python_cmd()
+    cmd.extend(['-m', 'test.libregrtest.worker', worker_json])

    env = dict(os.environ)
    if tmp_dir is not None:
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@ -399,7 +399,7 @@ class ParseArgsTestCase(unittest.TestCase):
        self.checkError(['--unknown-option'],
                        'unrecognized arguments: --unknown-option')

-    def check_ci_mode(self, args, use_resources, rerun=True):
+    def create_regrtest(self, args):
        ns = cmdline._parse_args(args)

        # Check Regrtest attributes which are more reliable than Namespace
@ -411,6 +411,10 @@ class ParseArgsTestCase(unittest.TestCase):

            regrtest = main.Regrtest(ns)

+        return regrtest
+
+    def check_ci_mode(self, args, use_resources, rerun=True):
+        regrtest = self.create_regrtest(args)
        self.assertEqual(regrtest.num_workers, -1)
        self.assertEqual(regrtest.want_rerun, rerun)
        self.assertTrue(regrtest.randomize)
@ -455,6 +459,11 @@ class ParseArgsTestCase(unittest.TestCase):
        ns = cmdline._parse_args(args)
        self.assertFalse(ns._add_python_opts)

+    def test_bisect(self):
+        args = ['--bisect']
+        regrtest = self.create_regrtest(args)
+        self.assertTrue(regrtest.want_bisect)
+

@dataclasses.dataclass(slots=True)
 class Rerun:
@ -1192,6 +1201,47 @@ class ArgsTestCase(BaseTestCase):
    def test_huntrleaks_mp(self):
        self.check_huntrleaks(run_workers=True)

+    @unittest.skipUnless(support.Py_DEBUG, 'need a debug build')
+    def test_huntrleaks_bisect(self):
+        # test --huntrleaks --bisect
+        code = textwrap.dedent("""
+            import unittest
+
+            GLOBAL_LIST = []
+
+            class RefLeakTest(unittest.TestCase):
+                def test1(self):
+                    pass
+
+                def test2(self):
+                    pass
+
+                def test3(self):
+                    GLOBAL_LIST.append(object())
+
+                def test4(self):
+                    pass
+        """)
+
+        test = self.create_test('huntrleaks', code=code)
+
+        filename = 'reflog.txt'
+        self.addCleanup(os_helper.unlink, filename)
+        cmd = ['--huntrleaks', '3:3:', '--bisect', test]
+        output = self.run_tests(*cmd,
+                                exitcode=EXITCODE_BAD_TEST,
+                                stderr=subprocess.STDOUT)
+
+        self.assertIn(f"Bisect {test}", output)
+        self.assertIn(f"Bisect {test}: exit code 0", output)
+
+        # test3 is the one which leaks
+        self.assertIn("Bisection completed in", output)
+        self.assertIn(
+            "Tests (1):\n"
+            f"* {test}.RefLeakTest.test3\n",
+            output)
+
    @unittest.skipUnless(support.Py_DEBUG, 'need a debug build')
    def test_huntrleaks_fd_leak(self):
        # test --huntrleaks for file descriptor leak
--- a/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst
+++ b/Misc/NEWS.d/next/Tests/2024-02-18-14-20-52.gh-issue-115122.3rGNo9.rst
@ -0,0 +1,2 @@
+Add ``--bisect`` option to regrtest test runner: run failed tests with
+``test.bisect_cmd`` to identify failing tests. Patch by Victor Stinner.