Skip to content

Commit

Permalink
bpo-40688: Use the correct parser in the peg_generator scripts (pytho…
Browse files Browse the repository at this point in the history
…nGH-20235)

The scripts in `Tools/peg_generator/scripts` mostly assume that
`ast.parse` and `compile` use the old parser, since this was the
state of things, while we were developing them. They need to be
updated to always use the correct parser. `_peg_parser` is being
extended to support both parsing and compiling with both parsers.
  • Loading branch information
lysnikolaou authored May 25, 2020
1 parent 4483253 commit 9645930
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 172 deletions.
133 changes: 81 additions & 52 deletions Modules/_peg_parser.c
Original file line number Diff line number Diff line change
@@ -1,104 +1,133 @@
#include <Python.h>
#include "pegen_interface.h"

PyObject *
_Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds)
static int
_mode_str_to_int(char *mode_str)
{
static char *keywords[] = {"file", "mode", NULL};
char *filename;
char *mode_str = "exec";

if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &filename, &mode_str)) {
return NULL;
}

int mode;
if (strcmp(mode_str, "exec") == 0) {
mode = Py_file_input;
}
else if (strcmp(mode_str, "eval") == 0) {
mode = Py_eval_input;
}
else if (strcmp(mode_str, "single") == 0) {
mode = Py_single_input;
}
else {
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'single'");
mode = -1;
}
return mode;
}

PyArena *arena = PyArena_New();
if (arena == NULL) {
static mod_ty
_run_parser(char *str, char *filename, int mode, PyCompilerFlags *flags, PyArena *arena, int oldparser)
{
mod_ty mod;
if (!oldparser) {
mod = PyPegen_ASTFromString(str, filename, mode, flags, arena);
}
else {
mod = PyParser_ASTFromString(str, filename, mode, flags, arena);
}
return mod;
}

PyObject *
_Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
char *the_string;
char *filename = "<string>";
char *mode_str = "exec";
int oldparser = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
&the_string, &filename, &mode_str, &oldparser)) {
return NULL;
}

int mode = _mode_str_to_int(mode_str);
if (mode == -1) {
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
}

PyCompilerFlags flags = _PyCompilerFlags_INIT;
PyObject *result = NULL;
flags.cf_flags = PyCF_IGNORE_COOKIE;

mod_ty res = PyPegen_ASTFromFilename(filename, mode, &flags, arena);
if (res == NULL) {
goto error;
PyArena *arena = PyArena_New();
if (arena == NULL) {
return NULL;
}

mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
}
result = PyAST_mod2obj(res);

error:
PyObject *filename_ob = PyUnicode_DecodeFSDefault(filename);
if (filename_ob == NULL) {
PyArena_Free(arena);
return NULL;
}
PyCodeObject *result = PyAST_CompileObject(mod, filename_ob, &flags, -1, arena);
Py_XDECREF(filename_ob);
PyArena_Free(arena);
return result;
return (PyObject *)result;
}

PyObject *
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"string", "mode", "oldparser", NULL};
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
char *the_string;
char *filename = "<string>";
char *mode_str = "exec";
int oldparser = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sp", keywords,
&the_string, &mode_str, &oldparser)) {
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
&the_string, &filename, &mode_str, &oldparser)) {
return NULL;
}

int mode;
if (strcmp(mode_str, "exec") == 0) {
mode = Py_file_input;
}
else if (strcmp(mode_str, "eval") == 0) {
mode = Py_eval_input;
}
else if (strcmp(mode_str, "single") == 0) {
mode = Py_single_input;
}
else {
int mode = _mode_str_to_int(mode_str);
if (mode == -1) {
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
}

PyCompilerFlags flags = _PyCompilerFlags_INIT;
flags.cf_flags = PyCF_IGNORE_COOKIE;

PyArena *arena = PyArena_New();
if (arena == NULL) {
return NULL;
}

PyObject *result = NULL;

PyCompilerFlags flags = _PyCompilerFlags_INIT;
flags.cf_flags = PyCF_IGNORE_COOKIE;

mod_ty res;
if (oldparser) {
res = PyParser_ASTFromString(the_string, "<string>", mode, &flags, arena);
}
else {
res = PyPegen_ASTFromString(the_string, "<string>", mode, &flags, arena);
}
if (res == NULL) {
goto error;
mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
}
result = PyAST_mod2obj(res);

error:
PyObject *result = PyAST_mod2obj(mod);
PyArena_Free(arena);
return result;
}

static PyMethodDef ParseMethods[] = {
{"parse_file", (PyCFunction)(void (*)(void))_Py_parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
{"parse_string", (PyCFunction)(void (*)(void))_Py_parse_string, METH_VARARGS|METH_KEYWORDS,"Parse a string."},
{
"parse_string",
(PyCFunction)(void (*)(void))_Py_parse_string,
METH_VARARGS|METH_KEYWORDS,
"Parse a string, return an AST."
},
{
"compile_string",
(PyCFunction)(void (*)(void))_Py_compile_string,
METH_VARARGS|METH_KEYWORDS,
"Compile a string, return a code object."
},
{NULL, NULL, 0, NULL} /* Sentinel */
};

Expand Down
24 changes: 9 additions & 15 deletions Tools/peg_generator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,22 @@ stats: peg_extension/parse.c data/xxl.py

time: time_compile

time_compile: venv peg_extension/parse.c data/xxl.py
time_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile

time_parse: venv peg_extension/parse.c data/xxl.py
time_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse

time_check: venv peg_extension/parse.c data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl check
time_old: time_old_compile

time_stdlib: time_stdlib_compile

time_stdlib_compile: venv peg_extension/parse.c data/xxl.py
time_old_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile

time_stdlib_parse: venv peg_extension/parse.c data/xxl.py
time_old_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse

test_local:
$(PYTHON) scripts/test_parse_directory.py \
time_peg_dir: venv
$(VENVPYTHON) scripts/test_parse_directory.py \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(TESTDIR) \
Expand All @@ -96,8 +93,8 @@ test_local:
--exclude "*/failset/**" \
--exclude "*/failset/**/*"

test_global: $(CPYTHON)
$(PYTHON) scripts/test_parse_directory.py \
time_stdlib: $(CPYTHON) venv
$(VENVPYTHON) scripts/test_parse_directory.py \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(CPYTHON) \
Expand All @@ -113,9 +110,6 @@ mypy: regen-metaparser
format-python:
black pegen scripts

bench: venv
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check

format: format-python

find_max_nesting:
Expand Down
41 changes: 11 additions & 30 deletions Tools/peg_generator/scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import os
from time import time

import _peg_parser

try:
import memory_profiler
except ModuleNotFoundError:
Expand All @@ -14,8 +16,6 @@
sys.exit(1)

sys.path.insert(0, os.getcwd())
from peg_extension import parse
from pegen.build import build_c_parser_and_generator
from scripts.test_parse_directory import parse_directory

argparser = argparse.ArgumentParser(
Expand All @@ -41,9 +41,6 @@
"compile", help="Benchmark parsing and compiling to bytecode"
)
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
command_check = subcommands.add_parser(
"check", help="Benchmark parsing and throwing the tree away"
)


def benchmark(func):
Expand All @@ -66,55 +63,41 @@ def wrapper(*args):
@benchmark
def time_compile(source, parser):
if parser == "cpython":
return compile(source, os.path.join("data", "xxl.py"), "exec")
return _peg_parser.compile_string(
source,
oldparser=True,
)
else:
return parse.parse_string(source, mode=2)
return _peg_parser.compile_string(source)


@benchmark
def time_parse(source, parser):
if parser == "cpython":
return ast.parse(source, os.path.join("data", "xxl.py"), "exec")
return _peg_parser.parse_string(source, oldparser=True)
else:
return parse.parse_string(source, mode=1)


@benchmark
def time_check(source):
return parse.parse_string(source, mode=0)
return _peg_parser.parse_string(source)


def run_benchmark_xxl(subcommand, parser, source):
if subcommand == "compile":
time_compile(source, parser)
elif subcommand == "parse":
time_parse(source, parser)
elif subcommand == "check":
time_check(source)


def run_benchmark_stdlib(subcommand, parser):
modes = {"compile": 2, "parse": 1, "check": 0}
extension = None
if parser == "pegen":
extension = build_c_parser_and_generator(
"../../Grammar/python.gram",
"../../Grammar/Tokens",
"peg_extension/parse.c",
compile_extension=True,
skip_actions=False,
)
for _ in range(3):
parse_directory(
"../../Lib",
"../../Grammar/python.gram",
"../../Grammar/Tokens",
verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
skip_actions=False,
tree_arg=0,
short=True,
extension=extension,
mode=modes[subcommand],
mode=2 if subcommand == "compile" else 1,
parser=parser,
)

Expand All @@ -127,8 +110,6 @@ def main():

if subcommand is None:
argparser.error("A benchmark to run is required")
if subcommand == "check" and parser == "cpython":
argparser.error("Cannot use check target with the CPython parser")

if target == "xxl":
with open(os.path.join("data", "xxl.py"), "r") as f:
Expand Down
18 changes: 7 additions & 11 deletions Tools/peg_generator/scripts/show_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import sys
import tempfile

import _peg_parser

from typing import List

sys.path.insert(0, os.getcwd())
Expand Down Expand Up @@ -72,7 +74,7 @@ def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:


def show_parse(source: str, verbose: bool = False) -> str:
tree = ast.parse(source)
tree = _peg_parser.parse_string(source, oldparser=True)
return format_tree(tree, verbose).rstrip("\n")


Expand All @@ -90,17 +92,11 @@ def main() -> None:
sep = " "
program = sep.join(args.program)
if args.grammar_file:
sys.path.insert(0, os.curdir)
from pegen.build import build_parser_and_generator

build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True)
from pegen.parse import parse_string # type: ignore[import]

tree = parse_string(program, mode=1)
tree = _peg_parser.parse_string(program)

if args.diff:
a = tree
b = ast.parse(program)
b = _peg_parser.parse_string(program, oldparser=True)
diff = diff_trees(a, b, args.verbose)
if diff:
for line in diff:
Expand All @@ -111,8 +107,8 @@ def main() -> None:
print(f"# Parsed using {args.grammar_file}")
print(format_tree(tree, args.verbose))
else:
tree = ast.parse(program)
print("# Parse using ast.parse()")
tree = _peg_parser.parse_string(program, oldparser=True)
print("# Parse using the old parser")
print(format_tree(tree, args.verbose))


Expand Down
Loading

0 comments on commit 9645930

Please sign in to comment.