mirror of
https://github.com/python/cpython.git
synced 2024-11-24 00:38:00 +01:00
bpo-43950: Specialize tracebacks for subscripts/binary ops (GH-27037)
Co-authored-by: Ammar Askar <ammar@ammaraskar.com> Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
parent
da2e673c53
commit
1890dd235f
@ -473,7 +473,7 @@ The output for the example would look similar to this:
|
||||
['Traceback (most recent call last):\n',
|
||||
' File "<doctest default[0]>", line 10, in <module>\n lumberjack()\n ^^^^^^^^^^^^\n',
|
||||
' File "<doctest default[0]>", line 4, in lumberjack\n bright_side_of_death()\n ^^^^^^^^^^^^^^^^^^^^^^\n',
|
||||
' File "<doctest default[0]>", line 7, in bright_side_of_death\n return tuple()[0]\n ^^^^^^^^^^\n',
|
||||
' File "<doctest default[0]>", line 7, in bright_side_of_death\n return tuple()[0]\n ~~~~~~~^^^\n',
|
||||
'IndexError: tuple index out of range\n']
|
||||
*** extract_tb:
|
||||
[<FrameSummary file <doctest...>, line 10 in <module>>,
|
||||
@ -482,7 +482,7 @@ The output for the example would look similar to this:
|
||||
*** format_tb:
|
||||
[' File "<doctest default[0]>", line 10, in <module>\n lumberjack()\n ^^^^^^^^^^^^\n',
|
||||
' File "<doctest default[0]>", line 4, in lumberjack\n bright_side_of_death()\n ^^^^^^^^^^^^^^^^^^^^^^\n',
|
||||
' File "<doctest default[0]>", line 7, in bright_side_of_death\n return tuple()[0]\n ^^^^^^^^^^\n']
|
||||
' File "<doctest default[0]>", line 7, in bright_side_of_death\n return tuple()[0]\n ~~~~~~~^^^\n']
|
||||
*** tb_lineno: 10
|
||||
|
||||
|
||||
|
@ -12,9 +12,11 @@ from test.support import (Error, captured_output, cpython_only, ALWAYS_EQ,
|
||||
requires_debug_ranges, has_no_debug_ranges)
|
||||
from test.support.os_helper import TESTFN, unlink
|
||||
from test.support.script_helper import assert_python_ok, assert_python_failure
|
||||
import textwrap
|
||||
|
||||
import os
|
||||
import textwrap
|
||||
import traceback
|
||||
from functools import partial
|
||||
|
||||
|
||||
test_code = namedtuple('code', ['co_filename', 'co_name'])
|
||||
@ -406,6 +408,82 @@ class TracebackErrorLocationCaretTests(unittest.TestCase):
|
||||
result_lines = self.get_exception(f_with_multiline)
|
||||
self.assertEqual(result_lines, expected_f.splitlines())
|
||||
|
||||
def test_caret_for_binary_operators(self):
|
||||
def f_with_binary_operator():
|
||||
divisor = 20
|
||||
return 10 + divisor / 0 + 30
|
||||
|
||||
lineno_f = f_with_binary_operator.__code__.co_firstlineno
|
||||
expected_error = (
|
||||
'Traceback (most recent call last):\n'
|
||||
f' File "{__file__}", line {self.callable_line}, in get_exception\n'
|
||||
' callable()\n'
|
||||
' ^^^^^^^^^^\n'
|
||||
f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
|
||||
' return 10 + divisor / 0 + 30\n'
|
||||
' ~~~~~~~~^~~\n'
|
||||
)
|
||||
result_lines = self.get_exception(f_with_binary_operator)
|
||||
self.assertEqual(result_lines, expected_error.splitlines())
|
||||
|
||||
def test_caret_for_binary_operators_two_char(self):
|
||||
def f_with_binary_operator():
|
||||
divisor = 20
|
||||
return 10 + divisor // 0 + 30
|
||||
|
||||
lineno_f = f_with_binary_operator.__code__.co_firstlineno
|
||||
expected_error = (
|
||||
'Traceback (most recent call last):\n'
|
||||
f' File "{__file__}", line {self.callable_line}, in get_exception\n'
|
||||
' callable()\n'
|
||||
' ^^^^^^^^^^\n'
|
||||
f' File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
|
||||
' return 10 + divisor // 0 + 30\n'
|
||||
' ~~~~~~~~^^~~\n'
|
||||
)
|
||||
result_lines = self.get_exception(f_with_binary_operator)
|
||||
self.assertEqual(result_lines, expected_error.splitlines())
|
||||
|
||||
def test_caret_for_subscript(self):
|
||||
def f_with_subscript():
|
||||
some_dict = {'x': {'y': None}}
|
||||
return some_dict['x']['y']['z']
|
||||
|
||||
lineno_f = f_with_subscript.__code__.co_firstlineno
|
||||
expected_error = (
|
||||
'Traceback (most recent call last):\n'
|
||||
f' File "{__file__}", line {self.callable_line}, in get_exception\n'
|
||||
' callable()\n'
|
||||
' ^^^^^^^^^^\n'
|
||||
f' File "{__file__}", line {lineno_f+2}, in f_with_subscript\n'
|
||||
" return some_dict['x']['y']['z']\n"
|
||||
' ~~~~~~~~~~~~~~~~~~~^^^^^\n'
|
||||
)
|
||||
result_lines = self.get_exception(f_with_subscript)
|
||||
self.assertEqual(result_lines, expected_error.splitlines())
|
||||
|
||||
def test_traceback_specialization_with_syntax_error(self):
|
||||
bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec")
|
||||
|
||||
with open(TESTFN, "w") as file:
|
||||
# make the file's contents invalid
|
||||
file.write("1 $ 0 / 1 / 2\n")
|
||||
self.addCleanup(unlink, TESTFN)
|
||||
|
||||
func = partial(exec, bytecode)
|
||||
result_lines = self.get_exception(func)
|
||||
|
||||
lineno_f = bytecode.co_firstlineno
|
||||
expected_error = (
|
||||
'Traceback (most recent call last):\n'
|
||||
f' File "{__file__}", line {self.callable_line}, in get_exception\n'
|
||||
' callable()\n'
|
||||
' ^^^^^^^^^^\n'
|
||||
f' File "{TESTFN}", line {lineno_f}, in <module>\n'
|
||||
" 1 $ 0 / 1 / 2\n"
|
||||
' ^^^^^\n'
|
||||
)
|
||||
self.assertEqual(result_lines, expected_error.splitlines())
|
||||
|
||||
@cpython_only
|
||||
@requires_debug_ranges()
|
||||
@ -1615,7 +1693,7 @@ class TestTracebackException(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
output.getvalue().split('\n')[-5:],
|
||||
[' x/0',
|
||||
' ^^^',
|
||||
' ~^~',
|
||||
' x = 12',
|
||||
'ZeroDivisionError: division by zero',
|
||||
''])
|
||||
|
@ -494,9 +494,23 @@ class StackSummary(list):
|
||||
colno = _byte_offset_to_character_offset(frame._original_line, frame.colno)
|
||||
end_colno = _byte_offset_to_character_offset(frame._original_line, frame.end_colno)
|
||||
|
||||
try:
|
||||
anchors = _extract_caret_anchors_from_line_segment(
|
||||
frame._original_line[colno - 1:end_colno]
|
||||
)
|
||||
except Exception:
|
||||
anchors = None
|
||||
|
||||
row.append(' ')
|
||||
row.append(' ' * (colno - stripped_characters))
|
||||
row.append('^' * (end_colno - colno))
|
||||
|
||||
if anchors:
|
||||
row.append(anchors.primary_char * (anchors.left_end_offset))
|
||||
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
|
||||
row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset))
|
||||
else:
|
||||
row.append('^' * (end_colno - colno))
|
||||
|
||||
row.append('\n')
|
||||
|
||||
if frame.locals:
|
||||
@ -520,6 +534,50 @@ def _byte_offset_to_character_offset(str, offset):
|
||||
return len(as_utf8[:offset + 1].decode("utf-8"))
|
||||
|
||||
|
||||
_Anchors = collections.namedtuple(
|
||||
"_Anchors",
|
||||
[
|
||||
"left_end_offset",
|
||||
"right_start_offset",
|
||||
"primary_char",
|
||||
"secondary_char",
|
||||
],
|
||||
defaults=["~", "^"]
|
||||
)
|
||||
|
||||
def _extract_caret_anchors_from_line_segment(segment):
|
||||
import ast
|
||||
|
||||
try:
|
||||
tree = ast.parse(segment)
|
||||
except SyntaxError:
|
||||
return None
|
||||
|
||||
if len(tree.body) != 1:
|
||||
return None
|
||||
|
||||
statement = tree.body[0]
|
||||
match statement:
|
||||
case ast.Expr(expr):
|
||||
match expr:
|
||||
case ast.BinOp():
|
||||
operator_str = segment[expr.left.end_col_offset:expr.right.col_offset]
|
||||
operator_offset = len(operator_str) - len(operator_str.lstrip())
|
||||
|
||||
left_anchor = expr.left.end_col_offset + operator_offset
|
||||
right_anchor = left_anchor + 1
|
||||
if (
|
||||
operator_offset + 1 < len(operator_str)
|
||||
and not operator_str[operator_offset + 1].isspace()
|
||||
):
|
||||
right_anchor += 1
|
||||
return _Anchors(left_anchor, right_anchor)
|
||||
case ast.Subscript():
|
||||
return _Anchors(expr.value.end_col_offset, expr.slice.end_col_offset + 1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class TracebackException:
|
||||
"""An exception ready for rendering.
|
||||
|
||||
|
@ -7,6 +7,10 @@
|
||||
#include "pycore_interp.h" // PyInterpreterState.gc
|
||||
#include "frameobject.h" // PyFrame_GetBack()
|
||||
#include "pycore_frame.h" // _PyFrame_GetCode()
|
||||
#include "pycore_pyarena.h" // _PyArena_Free()
|
||||
#include "pycore_ast.h" // asdl_seq_*
|
||||
#include "pycore_compile.h" // _PyAST_Optimize
|
||||
#include "pycore_parser.h" // _PyParser_ASTFromString
|
||||
#include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset()
|
||||
#include "structmember.h" // PyMemberDef
|
||||
#include "osdefs.h" // SEP
|
||||
@ -512,8 +516,172 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
|
||||
return err;
|
||||
}
|
||||
|
||||
/* AST based Traceback Specialization
|
||||
*
|
||||
* When displaying a new traceback line, for certain syntactical constructs
|
||||
* (e.g a subscript, an arithmetic operation) we try to create a representation
|
||||
* that separates the primary source of error from the rest.
|
||||
*
|
||||
* Example specialization of BinOp nodes:
|
||||
* Traceback (most recent call last):
|
||||
* File "/home/isidentical/cpython/cpython/t.py", line 10, in <module>
|
||||
* add_values(1, 2, 'x', 3, 4)
|
||||
* ^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* File "/home/isidentical/cpython/cpython/t.py", line 2, in add_values
|
||||
* return a + b + c + d + e
|
||||
* ~~~~~~^~~
|
||||
* TypeError: 'NoneType' object is not subscriptable
|
||||
*/
|
||||
|
||||
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
|
||||
|
||||
static int
|
||||
extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
|
||||
char** primary_error_char, char** secondary_error_char)
|
||||
{
|
||||
switch (expr->kind) {
|
||||
case BinOp_kind: {
|
||||
expr_ty left = expr->v.BinOp.left;
|
||||
expr_ty right = expr->v.BinOp.right;
|
||||
for (int i = left->end_col_offset + 1; i < right->col_offset; i++) {
|
||||
if (IS_WHITESPACE(segment_str[i])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
*left_anchor = i;
|
||||
*right_anchor = i + 1;
|
||||
|
||||
// Check whether if this a two-character operator (e.g //)
|
||||
if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
|
||||
++*right_anchor;
|
||||
}
|
||||
|
||||
// Set the error characters
|
||||
*primary_error_char = "~";
|
||||
*secondary_error_char = "^";
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
case Subscript_kind: {
|
||||
*left_anchor = expr->v.Subscript.value->end_col_offset;
|
||||
*right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
|
||||
|
||||
// Set the error characters
|
||||
*primary_error_char = "~";
|
||||
*secondary_error_char = "^";
|
||||
return 1;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
|
||||
char** primary_error_char, char** secondary_error_char)
|
||||
{
|
||||
switch (statement->kind) {
|
||||
case Expr_kind: {
|
||||
return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor,
|
||||
primary_error_char, secondary_error_char);
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
extract_anchors_from_line(PyObject *filename, PyObject *line,
|
||||
Py_ssize_t start_offset, Py_ssize_t end_offset,
|
||||
Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
|
||||
char** primary_error_char, char** secondary_error_char)
|
||||
{
|
||||
int res = -1;
|
||||
PyArena *arena = NULL;
|
||||
PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset);
|
||||
if (!segment) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
const char *segment_str = PyUnicode_AsUTF8(segment);
|
||||
if (!segment) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
arena = _PyArena_New();
|
||||
if (!arena) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
PyCompilerFlags flags = _PyCompilerFlags_INIT;
|
||||
|
||||
_PyASTOptimizeState state;
|
||||
state.optimize = _Py_GetConfig()->optimization_level;
|
||||
state.ff_features = 0;
|
||||
|
||||
mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input,
|
||||
&flags, arena);
|
||||
if (!module) {
|
||||
goto done;
|
||||
}
|
||||
if (!_PyAST_Optimize(module, arena, &state)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
assert(module->kind == Module_kind);
|
||||
if (asdl_seq_LEN(module->v.Module.body) == 1) {
|
||||
stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
|
||||
res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor,
|
||||
primary_error_char, secondary_error_char);
|
||||
} else {
|
||||
res = 0;
|
||||
}
|
||||
|
||||
done:
|
||||
if (res > 0) {
|
||||
*left_anchor += start_offset;
|
||||
*right_anchor += start_offset;
|
||||
}
|
||||
Py_XDECREF(segment);
|
||||
if (arena) {
|
||||
_PyArena_Free(arena);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#define _TRACEBACK_SOURCE_LINE_INDENT 4
|
||||
|
||||
static inline int
|
||||
ignore_source_errors(void) {
|
||||
if (PyErr_Occurred()) {
|
||||
if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
|
||||
return -1;
|
||||
}
|
||||
PyErr_Clear();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
|
||||
Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
|
||||
const char *primary, const char *secondary) {
|
||||
int err = 0;
|
||||
int special_chars = (left_end_offset != -1 || right_start_offset != -1);
|
||||
while (++offset <= end_offset) {
|
||||
if (offset <= start_offset || offset > end_offset) {
|
||||
err = PyFile_WriteString(" ", f);
|
||||
} else if (special_chars && left_end_offset < offset && offset <= right_start_offset) {
|
||||
err = PyFile_WriteString(secondary, f);
|
||||
} else {
|
||||
err = PyFile_WriteString(primary, f);
|
||||
}
|
||||
}
|
||||
err = PyFile_WriteString("\n", f);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
|
||||
PyFrameObject *frame, PyObject *name)
|
||||
@ -533,52 +701,68 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
|
||||
return err;
|
||||
int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
|
||||
PyObject* source_line = NULL;
|
||||
/* ignore errors since we can't report them, can we? */
|
||||
if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
|
||||
&truncation, &source_line)) {
|
||||
int code_offset = tb->tb_lasti;
|
||||
PyCodeObject* code = _PyFrame_GetCode(frame);
|
||||
|
||||
int start_line;
|
||||
int end_line;
|
||||
int start_col_byte_offset;
|
||||
int end_col_byte_offset;
|
||||
if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
|
||||
&end_line, &end_col_byte_offset)) {
|
||||
goto done;
|
||||
}
|
||||
if (start_line != end_line) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
|
||||
goto done;
|
||||
}
|
||||
// Convert the utf-8 byte offset to the actual character offset so we
|
||||
// print the right number of carets.
|
||||
Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
|
||||
Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
|
||||
|
||||
char offset = truncation;
|
||||
while (++offset <= start_offset) {
|
||||
err = PyFile_WriteString(" ", f);
|
||||
if (err < 0) {
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
while (++offset <= end_offset + 1) {
|
||||
err = PyFile_WriteString("^", f);
|
||||
if (err < 0) {
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
err = PyFile_WriteString("\n", f);
|
||||
if (_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
|
||||
&truncation, &source_line) != 0) {
|
||||
/* ignore errors since we can't report them, can we? */
|
||||
err = ignore_source_errors();
|
||||
goto done;
|
||||
}
|
||||
|
||||
else {
|
||||
PyErr_Clear();
|
||||
int code_offset = tb->tb_lasti;
|
||||
PyCodeObject* code = _PyFrame_GetCode(frame);
|
||||
|
||||
int start_line;
|
||||
int end_line;
|
||||
int start_col_byte_offset;
|
||||
int end_col_byte_offset;
|
||||
if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
|
||||
&end_line, &end_col_byte_offset)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (start_line != end_line) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
// When displaying errors, we will use the following generic structure:
|
||||
//
|
||||
// ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
|
||||
// ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
|
||||
// | |-> left_end_offset | |-> left_offset
|
||||
// |-> start_offset |-> right_start_offset
|
||||
//
|
||||
// In general we will only have (start_offset, end_offset) but we can gather more information
|
||||
// by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds
|
||||
// we could get *left_end_offset* and *right_start_offset* and some selection of characters for
|
||||
// the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the
|
||||
// AST information or we cannot identify special ranges within it, then left_end_offset and
|
||||
// right_end_offset will be set to -1.
|
||||
|
||||
// Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
|
||||
assert(source_line);
|
||||
Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
|
||||
Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
|
||||
Py_ssize_t left_end_offset = -1;
|
||||
Py_ssize_t right_start_offset = -1;
|
||||
|
||||
char *primary_error_char = "^";
|
||||
char *secondary_error_char = primary_error_char;
|
||||
|
||||
int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
|
||||
&left_end_offset, &right_start_offset,
|
||||
&primary_error_char, &secondary_error_char);
|
||||
if (res < 0 && ignore_source_errors() < 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = print_error_location_carets(f, truncation, start_offset, end_offset,
|
||||
right_start_offset, left_end_offset,
|
||||
primary_error_char, secondary_error_char);
|
||||
|
||||
done:
|
||||
Py_XDECREF(source_line);
|
||||
return err;
|
||||
|
Loading…
Reference in New Issue
Block a user