From e95984826eb3cdb3a3baedb2ccea35e11e9f8161 Mon Sep 17 00:00:00 2001 From: aorcajo <589252+aorcajo@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:40:29 +0200 Subject: [PATCH] gh-119310: Fix encoding when reading old history file (#121779) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/_pyrepl/readline.py | 12 +++-- Lib/test/test_pyrepl/test_pyrepl.py | 45 +++++++++++++++++++ Lib/test/test_repl.py | 5 ++- ...-09-06-14-13-01.gh-issue-119310.WQxyDF.rst | 3 ++ 4 files changed, 59 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-06-14-13-01.gh-issue-119310.WQxyDF.rst diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 4929dd31710..5e1d3085874 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -427,12 +427,16 @@ class _ReadlineWrapper: history = self.get_reader().history with open(os.path.expanduser(filename), 'rb') as f: - lines = [line.decode('utf-8', errors='replace') for line in f.read().split(b'\n')] + is_editline = f.readline().startswith(b"_HiStOrY_V2_") + if is_editline: + encoding = "unicode-escape" + else: + f.seek(0) + encoding = "utf-8" + + lines = [line.decode(encoding, errors='replace') for line in f.read().split(b'\n')] buffer = [] for line in lines: - # Ignore readline history file header - if line.startswith("_HiStOrY_V2_"): - continue if line.endswith("\r"): buffer.append(line+'\n') else: diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 84030e05d2a..e816de37206 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -1216,12 +1216,34 @@ class TestMain(TestCase): *, cmdline_args: list[str] | None = None, cwd: str | None = None, + ) -> tuple[str, int]: + temp_dir = None + if cwd is None: + temp_dir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) + cwd = temp_dir.name + try: + return self._run_repl( + repl_input, env=env, cmdline_args=cmdline_args, cwd=cwd + ) + finally: + if temp_dir is not None: + temp_dir.cleanup() + + def _run_repl( + self, + repl_input: str | list[str], + *, + env: dict | None, + cmdline_args: list[str] | None, + cwd: str, ) -> tuple[str, int]: assert pty master_fd, slave_fd = pty.openpty() cmd = [sys.executable, "-i", "-u"] if env is None: cmd.append("-I") + elif "PYTHON_HISTORY" not in env: + env["PYTHON_HISTORY"] = os.path.join(cwd, ".regrtest_history") if cmdline_args is not None: cmd.extend(cmdline_args) process = subprocess.Popen( @@ -1260,3 +1282,26 @@ class TestMain(TestCase): process.kill() exit_code = process.wait() return "".join(output), exit_code + + def test_readline_history_file(self): + # skip, if readline module is not available + readline = import_module('readline') + if readline.backend != "editline": + self.skipTest("GNU readline is not affected by this issue") + + hfile = tempfile.NamedTemporaryFile() + self.addCleanup(unlink, hfile.name) + env = os.environ.copy() + env["PYTHON_HISTORY"] = hfile.name + + env["PYTHON_BASIC_REPL"] = "1" + output, exit_code = self.run_repl("spam \nexit()\n", env=env) + self.assertEqual(exit_code, 0) + self.assertIn("spam ", output) + self.assertNotEqual(pathlib.Path(hfile.name).stat().st_size, 0) + self.assertIn("spam\\040", pathlib.Path(hfile.name).read_text()) + + env.pop("PYTHON_BASIC_REPL", None) + output, exit_code = self.run_repl("exit\n", env=env) + self.assertEqual(exit_code, 0) + self.assertNotIn("\\040", pathlib.Path(hfile.name).read_text()) diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py index 363808cb444..cd8ef0f1057 100644 --- a/Lib/test/test_repl.py +++ b/Lib/test/test_repl.py @@ -41,7 +41,7 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): # path may be used by Py_GetPath() to build the default module search # path. stdin_fname = os.path.join(os.path.dirname(sys.executable), "") - cmd_line = [stdin_fname, '-E', '-i'] + cmd_line = [stdin_fname, '-I', '-i'] cmd_line.extend(args) # Set TERM=vt100, for the rationale see the comments in spawn_python() of @@ -228,6 +228,7 @@ class TestInteractiveInterpreter(unittest.TestCase): f.write("exit(0)" + os.linesep) env = os.environ.copy() + env["PYTHON_HISTORY"] = os.path.join(tmpdir, ".asyncio_history") env["PYTHONSTARTUP"] = script subprocess.check_call( [sys.executable, "-m", "asyncio"], @@ -240,7 +241,7 @@ class TestInteractiveInterpreter(unittest.TestCase): @unittest.skipUnless(pty, "requires pty") def test_asyncio_repl_is_ok(self): m, s = pty.openpty() - cmd = [sys.executable, "-m", "asyncio"] + cmd = [sys.executable, "-I", "-m", "asyncio"] proc = subprocess.Popen( cmd, stdin=s, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-06-14-13-01.gh-issue-119310.WQxyDF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-06-14-13-01.gh-issue-119310.WQxyDF.rst new file mode 100644 index 00000000000..e7bc24b537d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-06-14-13-01.gh-issue-119310.WQxyDF.rst @@ -0,0 +1,3 @@ +Allow the new interactive shell to read history files written with the +editline library that use unicode-escaped entries. Patch by aorcajo and +Łukasz Langa.