Skip to content

Commit

Permalink
bpo-45738: Fix computation of error location for invalid continuation (
Browse files Browse the repository at this point in the history
…pythonGH-29550)

characters in the parser
  • Loading branch information
pablogsal authored Nov 14, 2021
1 parent f8da00e commit 25835c5
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 12 deletions.
8 changes: 7 additions & 1 deletion Lib/test/test_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,7 +1505,13 @@ def func2():
def test_invalid_line_continuation_error_position(self):
self._check_error(r"a = 3 \ 4",
"unexpected character after line continuation character",
lineno=1, offset=9)
lineno=1, offset=8)
self._check_error('1,\\#\n2',
"unexpected character after line continuation character",
lineno=1, offset=4)
self._check_error('\nfgdfgf\n1,\\#\n2\n',
"unexpected character after line continuation character",
lineno=3, offset=4)

def test_invalid_line_continuation_left_recursive(self):
# Check bpo-42218: SyntaxErrors following left-recursive rules
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix computation of error location for invalid continuation characters in the
parser. Patch by Pablo Galindo.
15 changes: 5 additions & 10 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -351,22 +351,17 @@ tokenizer_error(Parser *p)
msg = "too many levels of indentation";
break;
case E_LINECONT: {
char* loc = strrchr(p->tok->buf, '\n');
const char* last_char = p->tok->cur - 1;
if (loc != NULL && loc != last_char) {
col_offset = p->tok->cur - loc - 1;
p->tok->buf = loc;
} else {
col_offset = last_char - p->tok->buf - 1;
}
col_offset = p->tok->cur - p->tok->buf - 1;
msg = "unexpected character after line continuation character";
break;
}
default:
msg = "unknown parsing error";
}

RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg);
RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
col_offset >= 0 ? col_offset : 0,
p->tok->lineno, -1, msg);
return -1;
}

Expand Down Expand Up @@ -497,7 +492,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
does not physically exist */
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);

if (p->tok->lineno <= lineno) {
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
Py_ssize_t size = p->tok->inp - p->tok->buf;
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
}
Expand Down
1 change: 0 additions & 1 deletion Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1970,7 +1970,6 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
if (c != '\n') {
tok->done = E_LINECONT;
tok->cur = tok->inp;
return ERRORTOKEN;
}
c = tok_nextc(tok);
Expand Down

0 comments on commit 25835c5

Please sign in to comment.