Skip to content

Commit

Permalink
[3.9] bpo-44885: Correct the ast locations of f-strings with format s…
Browse files Browse the repository at this point in the history
…pecs and repeated expressions (pythonGH-27729).

(cherry picked from commit 8e832fb)

Co-authored-by: Pablo Galindo Salgado <[email protected]>
  • Loading branch information
pablogsal committed Aug 12, 2021
1 parent f7635f0 commit b01000e
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 55 deletions.
40 changes: 29 additions & 11 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,6 @@ def test_ast_line_numbers_nested(self):
self.assertEqual(call.col_offset, 11)

def test_ast_line_numbers_duplicate_expression(self):
"""Duplicate expression
NOTE: this is currently broken, always sets location of the first
expression.
"""
expr = """
a = 10
f'{a * x()} {a * x()} {a * x()}'
Expand Down Expand Up @@ -266,9 +261,9 @@ def test_ast_line_numbers_duplicate_expression(self):
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
self.assertEqual(binop.col_offset, 13)
self.assertEqual(binop.left.col_offset, 13)
self.assertEqual(binop.right.col_offset, 17)
# check the third binop location
binop = t.body[1].value.values[4].value
self.assertEqual(type(binop), ast.BinOp)
Expand All @@ -278,9 +273,32 @@ def test_ast_line_numbers_duplicate_expression(self):
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
self.assertEqual(binop.col_offset, 23)
self.assertEqual(binop.left.col_offset, 23)
self.assertEqual(binop.right.col_offset, 27)

def test_ast_numbers_fstring_with_formatting(self):

t = ast.parse('f"Here is that pesky {xxx:.3f} again"')
self.assertEqual(len(t.body), 1)
self.assertEqual(t.body[0].lineno, 1)

self.assertEqual(type(t.body[0]), ast.Expr)
self.assertEqual(type(t.body[0].value), ast.JoinedStr)
self.assertEqual(len(t.body[0].value.values), 3)

self.assertEqual(type(t.body[0].value.values[0]), ast.Constant)
self.assertEqual(type(t.body[0].value.values[1]), ast.FormattedValue)
self.assertEqual(type(t.body[0].value.values[2]), ast.Constant)

_, expr, _ = t.body[0].value.values

name = expr.value
self.assertEqual(type(name), ast.Name)
self.assertEqual(name.lineno, 1)
self.assertEqual(name.end_lineno, 1)
self.assertEqual(name.col_offset, 22)
self.assertEqual(name.end_col_offset, 25)

def test_ast_line_numbers_multiline_fstring(self):
# See bpo-30465 for details.
Expand Down
5 changes: 0 additions & 5 deletions Lib/test/test_peg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,6 @@ def f() -> Any:
('f-string_doublestarred', "f'{ {**x} }'"),
('f-string_escape_brace', "f'{{Escape'"),
('f-string_escape_closing_brace', "f'Escape}}'"),
('f-string_repr', "f'{a!r}'"),
('f-string_str', "f'{a!s}'"),
('f-string_ascii', "f'{a!a}'"),
('f-string_debug', "f'{a=}'"),
('f-string_padding', "f'{a:03d}'"),
('f-string_multiline',
"""
f'''
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Correct the ast locations of f-strings with format specs and repeated
expressions. Patch by Pablo Galindo
70 changes: 31 additions & 39 deletions Parser/pegen/parse_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,49 +284,48 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
/* Fix locations for the given node and its children.
`parent` is the enclosing node.
`expr_start` is the starting position of the expression (pointing to the open brace).
`n` is the node which locations are going to be fixed relative to parent.
`expr_str` is the child node's string representation, including braces.
*/
static bool
fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
fstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols)
{
*p_lines = 0;
*p_cols = 0;
assert(expr_start != NULL && *expr_start == '{');
if (parent && parent->bytes) {
char *parent_str = PyBytes_AsString(parent->bytes);
if (!parent_str) {
return false;
}
char *substr = strstr(parent_str, expr_str);
if (substr) {
// The following is needed, in order to correctly shift the column
// offset, in the case that (disregarding any whitespace) a newline
// immediately follows the opening curly brace of the fstring expression.
bool newline_after_brace = 1;
char *start = substr + 1;
while (start && *start != '}' && *start != '\n') {
if (*start != ' ' && *start != '\t' && *start != '\f') {
newline_after_brace = 0;
break;
}
start++;
// The following is needed, in order to correctly shift the column
// offset, in the case that (disregarding any whitespace) a newline
// immediately follows the opening curly brace of the fstring expression.
bool newline_after_brace = 1;
const char *start = expr_start + 1;
while (start && *start != '}' && *start != '\n') {
if (*start != ' ' && *start != '\t' && *start != '\f') {
newline_after_brace = 0;
break;
}
start++;
}

// Account for the characters from the last newline character to our
// left until the beginning of substr.
if (!newline_after_brace) {
start = substr;
while (start > parent_str && *start != '\n') {
start--;
}
*p_cols += (int)(substr - start);
// Account for the characters from the last newline character to our
// left until the beginning of expr_start.
if (!newline_after_brace) {
start = expr_start;
while (start > parent_str && *start != '\n') {
start--;
}
/* adjust the start based on the number of newlines encountered
before the f-string expression */
for (char* p = parent_str; p < substr; p++) {
if (*p == '\n') {
(*p_lines)++;
}
*p_cols += (int)(expr_start - start);
}
/* adjust the start based on the number of newlines encountered
before the f-string expression */
for (const char *p = parent_str; p < expr_start; p++) {
if (*p == '\n') {
(*p_lines)++;
}
}
}
Expand Down Expand Up @@ -370,26 +369,19 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,

len = expr_end - expr_start;
/* Allocate 3 extra bytes: open paren, close paren, null byte. */
str = PyMem_Malloc(len + 3);
str = PyMem_Calloc(len + 3, sizeof(char));
if (str == NULL) {
PyErr_NoMemory();
return NULL;
}

// The call to fstring_find_expr_location is responsible for finding the column offset
// the generated AST nodes need to be shifted to the right, which is equal to the number
// of the f-string characters before the expression starts. In order to correctly compute
// this offset, strstr gets called in fstring_find_expr_location which only succeeds
// if curly braces appear before and after the f-string expression (exactly like they do
// in the f-string itself), hence the following lines.
str[0] = '{';
// of the f-string characters before the expression starts.
memcpy(str+1, expr_start, len);
str[len+1] = '}';
str[len+2] = 0;

int lines, cols;
if (!fstring_find_expr_location(t, str, &lines, &cols)) {
PyMem_FREE(str);
if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) {
PyMem_Free(str);
return NULL;
}

Expand Down

0 comments on commit b01000e

Please sign in to comment.