Skip to content

Commit

Permalink
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
Browse files Browse the repository at this point in the history
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way:

>>> foo(x, z for z in range(10), t, w)
  File "<stdin>", line 1
    foo(x, z for z in range(10), t, w)
           ^
SyntaxError: Generator expression must be parenthesized

becomes

>>> foo(x, z for z in range(10), t, w)
  File "<stdin>", line 1
    foo(x, z for z in range(10), t, w)
           ^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
  • Loading branch information
pablogsal authored Apr 23, 2021
1 parent 91b69b7 commit a77aac4
Show file tree
Hide file tree
Showing 17 changed files with 1,060 additions and 592 deletions.
12 changes: 12 additions & 0 deletions Doc/library/exceptions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,18 @@ The following exceptions are the exceptions that are usually raised.

The source code text involved in the error.

.. attribute:: end_lineno

Which line number in the file the error occurred ends in. This is
1-indexed: the first line in the file has a ``lineno`` of 1.

.. attribute:: end_offset

The column in the end line where the error occurred finishes. This is
1-indexed: the first character in the line has an ``offset`` of 1.

.. versionchanged:: 3.10
Added the :attr:`end_lineno` and :attr:`end_offset` attributes.

.. exception:: IndentationError

Expand Down
25 changes: 25 additions & 0 deletions Doc/whatsnew/3.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,31 @@ These improvements are inspired by previous work in the PyPy interpreter.
(Contributed by Pablo Galindo in :issue:`42864` and Batuhan Taskaya in
:issue:`40176`.)
:exc:`SyntaxError` exceptions raised by the intepreter will now highlight the
full error range of the expression that consistutes the syntax error itself,
instead of just where the problem is detected. In this way, instead of displaying
(before Python 3.10):
.. code-block:: python
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^
SyntaxError: Generator expression must be parenthesized
now Python 3.10 will display the exception as:
.. code-block:: python
>>> foo(x, z for z in range(10), t, w)
File "<stdin>", line 1
foo(x, z for z in range(10), t, w)
^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
This improvement has been contributed by Pablo Galindo in :issue:`43914`.
A considerable amount of new specialized messages for :exc:`SyntaxError` exceptions
have been incorporated. Some of the most notable ones:
Expand Down
51 changes: 27 additions & 24 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -779,32 +779,32 @@ t_atom[expr_ty]:

# From here on, there are rules for invalid syntax with specialised error messages
invalid_arguments:
| args ',' '*' { RAISE_SYNTAX_ERROR("iterable argument unpacking follows keyword argument unpacking") }
| a=expression for_if_clauses ',' [args | expression for_if_clauses] {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") }
| a=args ',' '*' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable argument unpacking follows keyword argument unpacking") }
| a=expression b=for_if_clauses ',' [args | expression for_if_clauses] {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, PyPegen_last_item(b, comprehension_ty)->target, "Generator expression must be parenthesized") }
| a=args for_if_clauses { _PyPegen_nonparen_genexp_in_call(p, a) }
| args ',' a=expression for_if_clauses {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") }
| args ',' a=expression b=for_if_clauses {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, asdl_seq_GET(b, b->size-1)->target, "Generator expression must be parenthesized") }
| a=args ',' args { _PyPegen_arguments_parsing_error(p, a) }
invalid_kwarg:
| expression a='=' {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
a, "expression cannot contain assignment, perhaps you meant \"==\"?") }
| a=expression b='=' {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(
a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") }

invalid_expression:
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
| !(NAME STRING | SOFT_KEYWORD) a=disjunction expression {
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "invalid syntax. Perhaps you forgot a comma?") }
| !(NAME STRING | SOFT_KEYWORD) a=disjunction b=expression {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }

invalid_named_expression:
| a=expression ':=' expression {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
| a=NAME b='=' bitwise_or !('='|':='|',') {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
| a=NAME '=' b=bitwise_or !('='|':='|',') {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
| !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':='|',') {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(b, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
_PyPegen_get_expr_name(a)) }

invalid_assignment:
Expand Down Expand Up @@ -841,25 +841,28 @@ invalid_primary:
invalid_comprehension:
| ('[' | '(' | '{') a=starred_expression for_if_clauses {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable unpacking cannot be used in comprehension") }
| ('[' | '{') a=star_named_expression ',' [star_named_expressions] for_if_clauses {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "did you forget parentheses around the comprehension target?") }
| ('[' | '{') a=star_named_expression ',' b=star_named_expressions for_if_clauses {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, PyPegen_last_item(b, expr_ty),
"did you forget parentheses around the comprehension target?") }
| ('[' | '{') a=star_named_expression b=',' for_if_clauses {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "did you forget parentheses around the comprehension target?") }
invalid_dict_comprehension:
| '{' a='**' bitwise_or for_if_clauses '}' {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "dict unpacking cannot be used in dict comprehension") }
invalid_parameters:
| param_no_default* invalid_parameters_helper param_no_default {
RAISE_SYNTAX_ERROR("non-default argument follows default argument") }
| param_no_default* invalid_parameters_helper a=param_no_default {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "non-default argument follows default argument") }
invalid_parameters_helper: # This is only there to avoid type errors
| a=slash_with_default { _PyPegen_singleton_seq(p, a) }
| param_with_default+
invalid_lambda_parameters:
| lambda_param_no_default* invalid_lambda_parameters_helper lambda_param_no_default {
RAISE_SYNTAX_ERROR("non-default argument follows default argument") }
| lambda_param_no_default* invalid_lambda_parameters_helper a=lambda_param_no_default {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "non-default argument follows default argument") }
invalid_lambda_parameters_helper:
| a=lambda_slash_with_default { _PyPegen_singleton_seq(p, a) }
| lambda_param_with_default+
invalid_star_etc:
| '*' (')' | ',' (')' | '**')) { RAISE_SYNTAX_ERROR("named arguments must follow bare *") }
| a='*' (')' | ',' (')' | '**')) { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "named arguments must follow bare *") }
| '*' ',' TYPE_COMMENT { RAISE_SYNTAX_ERROR("bare * has associated type comment") }
invalid_lambda_star_etc:
| '*' (':' | ',' (':' | '**')) { RAISE_SYNTAX_ERROR("named arguments must follow bare *") }
Expand Down Expand Up @@ -897,7 +900,7 @@ invalid_try_stmt:
RAISE_INDENTATION_ERROR("expected an indented block after 'try' statement on line %d", a->lineno) }
invalid_except_stmt:
| 'except' a=expression ',' expressions ['as' NAME ] ':' {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "exception group must be parenthesized") }
RAISE_SYNTAX_ERROR_STARTING_FROM(a, "exception group must be parenthesized") }
| a='except' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") }
| a='except' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") }
invalid_finally_stmt:
Expand Down Expand Up @@ -942,10 +945,10 @@ invalid_class_def_raw:

invalid_double_starred_kvpairs:
| ','.double_starred_kvpair+ ',' invalid_kvpair
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use a starred expression in a dictionary value") }
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
| expression a=':' &('}'|',') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
invalid_kvpair:
| a=expression !(':') {
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "':' expected after dictionary key") }
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use a starred expression in a dictionary value") }
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, a->end_lineno, -1, "':' expected after dictionary key") }
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
| expression a=':' {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
9 changes: 9 additions & 0 deletions Include/cpython/pyerrors.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ typedef struct {
PyObject *filename;
PyObject *lineno;
PyObject *offset;
PyObject *end_lineno;
PyObject *end_offset;
PyObject *text;
PyObject *print_file_and_line;
} PySyntaxErrorObject;
Expand Down Expand Up @@ -148,6 +150,13 @@ PyAPI_FUNC(void) PyErr_SyntaxLocationObject(
int lineno,
int col_offset);

PyAPI_FUNC(void) PyErr_RangedSyntaxLocationObject(
PyObject *filename,
int lineno,
int col_offset,
int end_lineno,
int end_col_offset);

PyAPI_FUNC(PyObject *) PyErr_ProgramTextObject(
PyObject *filename,
int lineno);
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_symtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ typedef struct _symtable_entry {
int ste_comp_iter_expr; /* non-zero if visiting a comprehension range expression */
int ste_lineno; /* first line of block */
int ste_col_offset; /* offset of first line of block */
int ste_end_lineno; /* end line of block */
int ste_end_col_offset; /* end offset of first line of block */
int ste_opt_lineno; /* lineno of last exec or import * */
int ste_opt_col_offset; /* offset of last exec or import * */
struct symtable *ste_table;
Expand Down
10 changes: 5 additions & 5 deletions Lib/test/test_cmd_line_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ def test_syntaxerror_unindented_caret_position(self):
exitcode, stdout, stderr = assert_python_failure(script_name)
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
# Confirm that the caret is located under the '=' sign
self.assertIn("\n 1 + 1 = 2\n ^\n", text)
self.assertIn("\n ^^^^^\n", text)

def test_syntaxerror_indented_caret_position(self):
script = textwrap.dedent("""\
Expand All @@ -612,8 +612,8 @@ def test_syntaxerror_indented_caret_position(self):
script_name = _make_test_script(script_dir, 'script', script)
exitcode, stdout, stderr = assert_python_failure(script_name)
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
# Confirm that the caret is located under the first 1 character
self.assertIn("\n 1 + 1 = 2\n ^\n", text)
# Confirm that the caret starts under the first 1 character
self.assertIn("\n 1 + 1 = 2\n ^^^^^\n", text)

# Try the same with a form feed at the start of the indented line
script = (
Expand All @@ -624,7 +624,7 @@ def test_syntaxerror_indented_caret_position(self):
exitcode, stdout, stderr = assert_python_failure(script_name)
text = io.TextIOWrapper(io.BytesIO(stderr), "ascii").read()
self.assertNotIn("\f", text)
self.assertIn("\n 1 + 1 = 2\n ^\n", text)
self.assertIn("\n 1 + 1 = 2\n ^^^^^\n", text)

def test_syntaxerror_multi_line_fstring(self):
script = 'foo = f"""{}\nfoo"""\n'
Expand All @@ -650,7 +650,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
self.assertEqual(
stderr.splitlines()[-3:],
[ b' foo = """\\q"""',
b' ^',
b' ^^^^^^^^',
b'SyntaxError: invalid escape sequence \\q'
],
)
Expand Down
Loading

0 comments on commit a77aac4

Please sign in to comment.