diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 430daf69d29..c17bbb48b65 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -559,6 +559,23 @@ class TracebackErrorLocationCaretTestBase: result_lines = self.get_exception(f_with_binary_operator) self.assertEqual(result_lines, expected_error.splitlines()) + def test_caret_for_binary_operators_with_unicode(self): + def f_with_binary_operator(): + áóí = 20 + return 10 + áóí / 0 + 30 + + lineno_f = f_with_binary_operator.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n' + ' return 10 + áóí / 0 + 30\n' + ' ~~~~^~~\n' + ) + result_lines = self.get_exception(f_with_binary_operator) + self.assertEqual(result_lines, expected_error.splitlines()) + def test_caret_for_binary_operators_two_char(self): def f_with_binary_operator(): divisor = 20 @@ -593,6 +610,23 @@ class TracebackErrorLocationCaretTestBase: result_lines = self.get_exception(f_with_subscript) self.assertEqual(result_lines, expected_error.splitlines()) + def test_caret_for_subscript_unicode(self): + def f_with_subscript(): + some_dict = {'ó': {'á': {'í': {'theta': 1}}}} + return some_dict['ó']['á']['í']['beta'] + + lineno_f = f_with_subscript.__code__.co_firstlineno + expected_error = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + f' File "{__file__}", line {lineno_f+2}, in f_with_subscript\n' + " return some_dict['ó']['á']['í']['beta']\n" + ' ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^\n' + ) + result_lines = self.get_exception(f_with_subscript) + self.assertEqual(result_lines, expected_error.splitlines()) + def test_traceback_specialization_with_syntax_error(self): bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec") @@ -3356,7 +3390,7 @@ class SuggestionFormattingTestBase: actual = self.get_suggestion(func) self.assertNotIn("blech", actual) - + def test_name_error_with_instance(self): class A: def __init__(self): diff --git a/Lib/traceback.py b/Lib/traceback.py index 8d518728fa1..c43c4720ae5 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -586,12 +586,15 @@ def _extract_caret_anchors_from_line_segment(segment): if len(tree.body) != 1: return None + normalize = lambda offset: _byte_offset_to_character_offset(segment, offset) statement = tree.body[0] match statement: case ast.Expr(expr): match expr: case ast.BinOp(): - operator_str = segment[expr.left.end_col_offset:expr.right.col_offset] + operator_start = normalize(expr.left.end_col_offset) + operator_end = normalize(expr.right.col_offset) + operator_str = segment[operator_start:operator_end] operator_offset = len(operator_str) - len(operator_str.lstrip()) left_anchor = expr.left.end_col_offset + operator_offset @@ -601,9 +604,11 @@ def _extract_caret_anchors_from_line_segment(segment): and not operator_str[operator_offset + 1].isspace() ): right_anchor += 1 - return _Anchors(left_anchor, right_anchor) + return _Anchors(normalize(left_anchor), normalize(right_anchor)) case ast.Subscript(): - return _Anchors(expr.value.end_col_offset, expr.slice.end_col_offset + 1) + subscript_start = normalize(expr.value.end_col_offset) + subscript_end = normalize(expr.slice.end_col_offset + 1) + return _Anchors(subscript_start, subscript_end) return None @@ -1044,7 +1049,7 @@ def _compute_suggestion_error(exc_value, tb, wrong_name): self = frame.f_locals['self'] if hasattr(self, wrong_name): return f"self.{wrong_name}" - + # Compute closest match if len(d) > _MAX_CANDIDATE_ITEMS: diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-17-58.gh-issue-99103.bFA9BX.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-17-58.gh-issue-99103.bFA9BX.rst new file mode 100644 index 00000000000..f5378eb837d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-17-58.gh-issue-99103.bFA9BX.rst @@ -0,0 +1,2 @@ +Fix the error reporting positions of specialized traceback anchors when the +source line contains Unicode characters. diff --git a/Python/traceback.c b/Python/traceback.c index aacdb33d39b..356e6436483 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -700,8 +700,13 @@ extract_anchors_from_line(PyObject *filename, PyObject *line, done: if (res > 0) { - *left_anchor += start_offset; - *right_anchor += start_offset; + // Normalize the AST offsets to byte offsets and adjust them with the + // start of the actual line (instead of the source code segment). + assert(segment != NULL); + assert(*left_anchor >= 0); + assert(*right_anchor >= 0); + *left_anchor = _PyPegen_byte_offset_to_character_offset(segment, *left_anchor) + start_offset; + *right_anchor = _PyPegen_byte_offset_to_character_offset(segment, *right_anchor) + start_offset; } Py_XDECREF(segment); if (arena) {