From 25835c518aa7446f3680b62c1fb43827e0f190d9 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 14 Nov 2021 01:06:41 +0000 Subject: [PATCH] bpo-45738: Fix computation of error location for invalid continuation (GH-29550) characters in the parser --- Lib/test/test_syntax.py | 8 +++++++- .../2021-11-14-00-14-45.bpo-45738.e0cgKd.rst | 2 ++ Parser/pegen.c | 15 +++++---------- Parser/tokenizer.c | 1 - 4 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 65d18e4941be58..f41df8ca49aa6f 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1505,7 +1505,13 @@ def func2(): def test_invalid_line_continuation_error_position(self): self._check_error(r"a = 3 \ 4", "unexpected character after line continuation character", - lineno=1, offset=9) + lineno=1, offset=8) + self._check_error('1,\\#\n2', + "unexpected character after line continuation character", + lineno=1, offset=4) + self._check_error('\nfgdfgf\n1,\\#\n2\n', + "unexpected character after line continuation character", + lineno=3, offset=4) def test_invalid_line_continuation_left_recursive(self): # Check bpo-42218: SyntaxErrors following left-recursive rules diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst new file mode 100644 index 00000000000000..b238034323c77f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst @@ -0,0 +1,2 @@ +Fix computation of error location for invalid continuation characters in the +parser. Patch by Pablo Galindo. diff --git a/Parser/pegen.c b/Parser/pegen.c index b00eff3432decf..8a3f740c359c0c 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -351,14 +351,7 @@ tokenizer_error(Parser *p) msg = "too many levels of indentation"; break; case E_LINECONT: { - char* loc = strrchr(p->tok->buf, '\n'); - const char* last_char = p->tok->cur - 1; - if (loc != NULL && loc != last_char) { - col_offset = p->tok->cur - loc - 1; - p->tok->buf = loc; - } else { - col_offset = last_char - p->tok->buf - 1; - } + col_offset = p->tok->cur - p->tok->buf - 1; msg = "unexpected character after line continuation character"; break; } @@ -366,7 +359,9 @@ tokenizer_error(Parser *p) msg = "unknown parsing error"; } - RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg); + RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, + col_offset >= 0 ? col_offset : 0, + p->tok->lineno, -1, msg); return -1; } @@ -497,7 +492,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, does not physically exist */ assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec); - if (p->tok->lineno <= lineno) { + if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { Py_ssize_t size = p->tok->inp - p->tok->buf; error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 8a19458ec72f46..f281c423d0e0c6 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1970,7 +1970,6 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) c = tok_nextc(tok); if (c != '\n') { tok->done = E_LINECONT; - tok->cur = tok->inp; return ERRORTOKEN; } c = tok_nextc(tok);