Skip to content

Commit

Permalink
bpo-41076: Pre-feed the parser with the f-string expression location (p…
Browse files Browse the repository at this point in the history
…ythonGH-21054)

This commit changes the parsing of f-string expressions with the new parser. The parser gets pre-fed with the location of the expression itself (not the f-string, which was what we were doing before). This allows us to completely skip the shifting of the AST nodes after the parsing is completed.
  • Loading branch information
lysnikolaou authored Jun 27, 2020
1 parent 89e82c4 commit 1f0f4ab
Show file tree
Hide file tree
Showing 5 changed files with 2,426 additions and 2,642 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pre-feed the parser with the location of the f-string expression, not the f-string itself, which allows us to skip the shifting of the AST node locations after the parsing is completed.
3 changes: 3 additions & 0 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
}
}

if (p->start_rule == Py_fstring_input) {
col_offset -= p->starting_col_offset;
}
Py_ssize_t col_number = col_offset;

if (p->tok->encoding != NULL) {
Expand Down
264 changes: 22 additions & 242 deletions Parser/string_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,243 +271,14 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,

// FSTRING STUFF

static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);


static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
if (n == NULL) {
return;
}
if (parent->lineno < n->lineno) {
col = 0;
}
fstring_shift_expr_locations(n, line, col);
}

static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
if (parent->lineno < n->lineno) {
col = 0;
}
fstring_shift_argument(parent, n, line, col);
}

static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
expr_ty expr = asdl_seq_GET(seq, i);
if (expr == NULL){
continue;
}
shift_expr(parent, expr, lineno, col_offset);
}
}

static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
switch (slice->kind) {
case Slice_kind:
if (slice->v.Slice.lower) {
shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
}
if (slice->v.Slice.upper) {
shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
}
if (slice->v.Slice.step) {
shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
}
break;
case Tuple_kind:
fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
break;
default:
break;
}
}

static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
shift_expr(parent, comp->target, lineno, col_offset);
shift_expr(parent, comp->iter, lineno, col_offset);
fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
}

static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
if (arg->annotation != NULL){
shift_expr(parent, arg->annotation, lineno, col_offset);
}
arg->col_offset = arg->col_offset + col_offset;
arg->end_col_offset = arg->end_col_offset + col_offset;
arg->lineno = arg->lineno + lineno;
arg->end_lineno = arg->end_lineno + lineno;
}

static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
shift_arg(parent, arg, lineno, col_offset);
}

for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
arg_ty arg = asdl_seq_GET(args->args, i);
shift_arg(parent, arg, lineno, col_offset);
}

if (args->vararg != NULL) {
shift_arg(parent, args->vararg, lineno, col_offset);
}

for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
shift_arg(parent, arg, lineno, col_offset);
}

fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);

if (args->kwarg != NULL) {
shift_arg(parent, args->kwarg, lineno, col_offset);
}

fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
}

static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
switch (node->kind) {
case BoolOp_kind:
fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
break;
case NamedExpr_kind:
shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
break;
case BinOp_kind:
shift_expr(node, node->v.BinOp.left, lineno, col_offset);
shift_expr(node, node->v.BinOp.right, lineno, col_offset);
break;
case UnaryOp_kind:
shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
break;
case Lambda_kind:
fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
shift_expr(node, node->v.Lambda.body, lineno, col_offset);
break;
case IfExp_kind:
shift_expr(node, node->v.IfExp.test, lineno, col_offset);
shift_expr(node, node->v.IfExp.body, lineno, col_offset);
shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
break;
case Dict_kind:
fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
break;
case Set_kind:
fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
break;
case ListComp_kind:
shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case SetComp_kind:
shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case DictComp_kind:
shift_expr(node, node->v.DictComp.key, lineno, col_offset);
shift_expr(node, node->v.DictComp.value, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case GeneratorExp_kind:
shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case Await_kind:
shift_expr(node, node->v.Await.value, lineno, col_offset);
break;
case Yield_kind:
shift_expr(node, node->v.Yield.value, lineno, col_offset);
break;
case YieldFrom_kind:
shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
break;
case Compare_kind:
shift_expr(node, node->v.Compare.left, lineno, col_offset);
fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
break;
case Call_kind:
shift_expr(node, node->v.Call.func, lineno, col_offset);
fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
shift_expr(node, keyword->value, lineno, col_offset);
}
break;
case Attribute_kind:
shift_expr(node, node->v.Attribute.value, lineno, col_offset);
break;
case Subscript_kind:
shift_expr(node, node->v.Subscript.value, lineno, col_offset);
fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
break;
case Starred_kind:
shift_expr(node, node->v.Starred.value, lineno, col_offset);
break;
case List_kind:
fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
break;
case Tuple_kind:
fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
break;
case JoinedStr_kind:
fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
break;
case FormattedValue_kind:
shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
if (node->v.FormattedValue.format_spec) {
shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
}
break;
default:
return;
}
}

/* Shift locations for the given node and all its children by adding `lineno`
and `col_offset` to existing locations. Note that n is the already parsed
expression. */
static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
{
n->col_offset = n->col_offset + col_offset;

// The following is needed, in order for nodes spanning across multiple lines
// to be shifted correctly. An example of such a node is a Call node, the closing
// parenthesis of which is not on the same line as its name.
if (n->lineno == n->end_lineno) {
n->end_col_offset = n->end_col_offset + col_offset;
}

fstring_shift_children_locations(n, lineno, col_offset);
n->lineno = n->lineno + lineno;
n->end_lineno = n->end_lineno + lineno;
}

/* Fix locations for the given node and its children.
`parent` is the enclosing node.
`n` is the node which locations are going to be fixed relative to parent.
`expr_str` is the child node's string representation, including braces.
*/
static void
fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
{
char *substr = NULL;
char *start;
Expand Down Expand Up @@ -552,7 +323,8 @@ fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
}
}
}
fstring_shift_expr_locations(n, lines, cols);
*p_lines = lines;
*p_cols = cols;
}


Expand Down Expand Up @@ -598,11 +370,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
return NULL;
}

str[0] = '(';
// The call to fstring_find_expr_location is responsible for finding the column offset
// the generated AST nodes need to be shifted to the right, which is equal to the number
// of the f-string characters before the expression starts. In order to correctly compute
// this offset, strstr gets called in fstring_find_expr_location which only succeeds
// if curly braces appear before and after the f-string expression (exactly like they do
// in the f-string itself), hence the following lines.
str[0] = '{';
memcpy(str+1, expr_start, len);
str[len+1] = ')';
str[len+1] = '}';
str[len+2] = 0;

int lines, cols;
fstring_find_expr_location(t, str, &lines, &cols);

// The parentheses are needed in order to allow for leading whitespace withing
// the f-string expression. This consequently gets parsed as a group (see the
// group rule in python.gram).
str[0] = '(';
str[len+1] = ')';

struct tok_state* tok = PyTokenizer_FromString(str, 1);
if (tok == NULL) {
PyMem_Free(str);
Expand All @@ -613,21 +400,14 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,

Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
NULL, p->arena);
p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
? p->starting_col_offset + t->col_offset : 0;
p2->starting_lineno = t->lineno + lines - 1;
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno ? t->col_offset + cols : cols;

expr = _PyPegen_run_parser(p2);

if (expr == NULL) {
goto exit;
}

/* Reuse str to find the correct column offset. */
str[0] = '{';
str[len+1] = '}';
fstring_fix_expr_location(t, expr, str);

result = expr;

exit:
Expand Down
Loading

0 comments on commit 1f0f4ab

Please sign in to comment.