bpo-35975: Support parsing earlier minor versions of Python 3 (python…

…GH-12086) This adds a `feature_version` flag to `ast.parse()` (documented) and `compile()` (hidden) that allow tweaking the parser to support older versions of the grammar. In particular if `feature_version` is 5 or 6, the hacks for the `async` and `await` keyword from PEP 492 are reinstated. (For 7 or higher, these are unconditionally treated as keywords, but they are still special tokens rather than `NAME` tokens that the parser driver recognizes.) https://bugs.python.org/issue35975
skrah · Mar 7, 2019 · 495da29 · 495da29
1 parent bf94cc7
commit 495da29
Show file tree

Hide file tree

Showing 29 changed files with 473 additions and 198 deletions.
diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst
@@ -126,7 +126,7 @@ The abstract grammar is currently defined as follows:
 Apart from the node classes, the :mod:`ast` module defines these utility functions
 and classes for traversing abstract syntax trees:
 
-.. function:: parse(source, filename='<unknown>', mode='exec', *, type_comments=False)
+.. function:: parse(source, filename='<unknown>', mode='exec', *, type_comments=False, feature_version=-1)
 
    Parse the source into an AST node.  Equivalent to ``compile(source,
    filename, mode, ast.PyCF_ONLY_AST)``.
@@ -145,13 +145,19 @@ and classes for traversing abstract syntax trees:
    modified to correspond to :pep:`484` "signature type comments",
    e.g. ``(str, int) -> List[str]``.
 
+   Also, setting ``feature_version`` to the minor version of an
+   earlier Python 3 version will attempt to parse using that version's
+   grammar.  For example, setting ``feature_version=4`` will allow
+   the use of ``async`` and ``await`` as variable names.  The lowest
+   supported value is 4; the highest is ``sys.version_info[1]``.
+
    .. warning::
       It is possible to crash the Python interpreter with a
       sufficiently large/complex string due to stack depth limitations
       in Python's AST compiler.
 
    .. versionchanged:: 3.8
-      Added ``type_comments=True`` and ``mode='func_type'``.
+      Added ``type_comments``, ``mode='func_type'`` and ``feature_version``.
 
 
 .. function:: literal_eval(node_or_string)

diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc
diff --git a/Doc/library/token.rst b/Doc/library/token.rst
@@ -88,3 +88,6 @@ the :mod:`tokenize` module.
 
 .. versionchanged:: 3.8
    Added :data:`TYPE_COMMENT`.
+   Added :data:`AWAIT` and :data:`ASYNC` tokens back (they're needed
+   to support parsing older Python versions for :func:`ast.parse` with
+   ``feature_version`` set to 6 or lower).
diff --git a/Grammar/Grammar b/Grammar/Grammar
@@ -18,7 +18,7 @@ decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)
 
-async_funcdef: 'async' funcdef
+async_funcdef: ASYNC funcdef
 funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] func_body_suite
 
 parameters: '(' [typedargslist] ')'
@@ -70,7 +70,7 @@ nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
 assert_stmt: 'assert' test [',' test]
 
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
-async_stmt: 'async' (funcdef | with_stmt | for_stmt)
+async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
 if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite]
@@ -106,7 +106,7 @@ arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom_expr ['**' factor]
-atom_expr: ['await'] atom trailer*
+atom_expr: [AWAIT] atom trailer*
 atom: ('(' [yield_expr|testlist_comp] ')' |
        '[' [testlist_comp] ']' |
        '{' [dictorsetmaker] '}' |
@@ -144,7 +144,7 @@ argument: ( test [comp_for] |
 
 comp_iter: comp_for | comp_if
 sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
-comp_for: ['async'] sync_comp_for
+comp_for: [ASYNC] sync_comp_for
 comp_if: 'if' test_nocond [comp_iter]
 
 # not used in grammar, but may appear in "node" passed from Parser to Compiler

diff --git a/Grammar/Tokens b/Grammar/Tokens
@@ -55,6 +55,8 @@ ELLIPSIS                '...'
 COLONEQUAL              ':='
 
 OP
+AWAIT
+ASYNC
 TYPE_IGNORE
 TYPE_COMMENT
 ERRORTOKEN

diff --git a/Include/Python-ast.h b/Include/Python-ast.h
diff --git a/Include/compile.h b/Include/compile.h
@@ -27,6 +27,7 @@ PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *);
 #ifndef Py_LIMITED_API
 typedef struct {
     int cf_flags;  /* bitmask of CO_xxx flags relevant to future */
+    int cf_feature_version;  /* minor Python version (PyCF_ONLY_AST) */
 } PyCompilerFlags;
 #endif
 

diff --git a/Include/parsetok.h b/Include/parsetok.h
@@ -35,6 +35,7 @@ typedef struct {
 #define PyPARSE_IGNORE_COOKIE 0x0010
 #define PyPARSE_BARRY_AS_BDFL 0x0020
 #define PyPARSE_TYPE_COMMENTS 0x0040
+#define PyPARSE_ASYNC_HACKS   0x0080
 
 PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int,
                                               perrdetail *);

diff --git a/Include/token.h b/Include/token.h
diff --git a/Lib/ast.py b/Lib/ast.py
@@ -27,7 +27,8 @@
 from _ast import *
 
 
-def parse(source, filename='<unknown>', mode='exec', *, type_comments=False):
+def parse(source, filename='<unknown>', mode='exec', *,
+          type_comments=False, feature_version=-1):
     """
     Parse the source into an AST node.
     Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
@@ -36,7 +37,8 @@ def parse(source, filename='<unknown>', mode='exec', *, type_comments=False):
     flags = PyCF_ONLY_AST
     if type_comments:
         flags |= PyCF_TYPE_COMMENTS
-    return compile(source, filename, mode, flags)
+    return compile(source, filename, mode, flags,
+                   feature_version=feature_version)
 
 
 def literal_eval(node_or_string):

diff --git a/Lib/keyword.py b/Lib/keyword.py
@@ -20,8 +20,6 @@
         'and',
         'as',
         'assert',
-        'async',
-        'await',
         'break',
         'class',
         'continue',
@@ -52,6 +50,10 @@
 #--end keywords--
         ]
 
+kwlist.append('async')
+kwlist.append('await')
+kwlist.sort()
+
 iskeyword = frozenset(kwlist).__contains__
 
 def main():

diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py
@@ -916,7 +916,7 @@ def XXXROUNDUP(n):
                 return (n + 3) & ~3
             return 1 << (n - 1).bit_length()
 
-        basesize = support.calcobjsize('Pii')
+        basesize = support.calcobjsize('Piii')
         nodesize = struct.calcsize('hP3iP0h2i')
         def sizeofchildren(node):
             if node is None:

diff --git a/Lib/test/test_type_comments.py b/Lib/test/test_type_comments.py
@@ -1,4 +1,5 @@
 import ast
+import sys
 import unittest
 
 
@@ -20,6 +21,29 @@ async def bar():  # type: () -> int
     return await bar()
 """
 
+asyncvar = """\
+async = 12
+await = 13
+"""
+
+asynccomp = """\
+async def foo(xs):
+    [x async for x in xs]
+"""
+
+matmul = """\
+a = b @ c
+"""
+
+fstring = """\
+a = 42
+f"{a}"
+"""
+
+underscorednumber = """\
+a = 42_42_42
+"""
+
 redundantdef = """\
 def foo():  # type: () -> int
     # type: () -> str
@@ -155,80 +179,117 @@ def favk(
 
 class TypeCommentTests(unittest.TestCase):
 
-    def parse(self, source):
-        return ast.parse(source, type_comments=True)
+    lowest = 4  # Lowest minor version supported
+    highest = sys.version_info[1]  # Highest minor version
+
+    def parse(self, source, feature_version=highest):
+        return ast.parse(source, type_comments=True,
+                         feature_version=feature_version)
+
+    def parse_all(self, source, minver=lowest, maxver=highest, expected_regex=""):
+        for feature_version in range(self.lowest, self.highest + 1):
+            if minver <= feature_version <= maxver:
+                try:
+                    yield self.parse(source, feature_version)
+                except SyntaxError as err:
+                    raise SyntaxError(str(err) + f" feature_version={feature_version}")
+            else:
+                with self.assertRaisesRegex(SyntaxError, expected_regex,
+                                            msg=f"feature_version={feature_version}"):
+                    self.parse(source, feature_version)
 
     def classic_parse(self, source):
         return ast.parse(source)
 
     def test_funcdef(self):
-        tree = self.parse(funcdef)
-        self.assertEqual(tree.body[0].type_comment, "() -> int")
-        self.assertEqual(tree.body[1].type_comment, "() -> None")
+        for tree in self.parse_all(funcdef):
+            self.assertEqual(tree.body[0].type_comment, "() -> int")
+            self.assertEqual(tree.body[1].type_comment, "() -> None")
         tree = self.classic_parse(funcdef)
         self.assertEqual(tree.body[0].type_comment, None)
         self.assertEqual(tree.body[1].type_comment, None)
 
     def test_asyncdef(self):
-        tree = self.parse(asyncdef)
-        self.assertEqual(tree.body[0].type_comment, "() -> int")
-        self.assertEqual(tree.body[1].type_comment, "() -> int")
+        for tree in self.parse_all(asyncdef, minver=5):
+            self.assertEqual(tree.body[0].type_comment, "() -> int")
+            self.assertEqual(tree.body[1].type_comment, "() -> int")
         tree = self.classic_parse(asyncdef)
         self.assertEqual(tree.body[0].type_comment, None)
         self.assertEqual(tree.body[1].type_comment, None)
 
+    def test_asyncvar(self):
+        for tree in self.parse_all(asyncvar, maxver=6):
+            pass
+
+    def test_asynccomp(self):
+        for tree in self.parse_all(asynccomp, minver=6):
+            pass
+
+    def test_matmul(self):
+        for tree in self.parse_all(matmul, minver=5):
+            pass
+
+    def test_fstring(self):
+        for tree in self.parse_all(fstring, minver=6):
+            pass
+
+    def test_underscorednumber(self):
+        for tree in self.parse_all(underscorednumber, minver=6):
+            pass
+
     def test_redundantdef(self):
-        with self.assertRaisesRegex(SyntaxError, "^Cannot have two type comments on def"):
-            tree = self.parse(redundantdef)
+        for tree in self.parse_all(redundantdef, maxver=0,
+                                expected_regex="^Cannot have two type comments on def"):
+            pass
 
     def test_nonasciidef(self):
-        tree = self.parse(nonasciidef)
-        self.assertEqual(tree.body[0].type_comment, "() -> àçčéñt")
+        for tree in self.parse_all(nonasciidef):
+            self.assertEqual(tree.body[0].type_comment, "() -> àçčéñt")
 
     def test_forstmt(self):
-        tree = self.parse(forstmt)
-        self.assertEqual(tree.body[0].type_comment, "int")
+        for tree in self.parse_all(forstmt):
+            self.assertEqual(tree.body[0].type_comment, "int")
         tree = self.classic_parse(forstmt)
         self.assertEqual(tree.body[0].type_comment, None)
 
     def test_withstmt(self):
-        tree = self.parse(withstmt)
-        self.assertEqual(tree.body[0].type_comment, "int")
+        for tree in self.parse_all(withstmt):
+            self.assertEqual(tree.body[0].type_comment, "int")
         tree = self.classic_parse(withstmt)
         self.assertEqual(tree.body[0].type_comment, None)
 
     def test_vardecl(self):
-        tree = self.parse(vardecl)
-        self.assertEqual(tree.body[0].type_comment, "int")
+        for tree in self.parse_all(vardecl):
+            self.assertEqual(tree.body[0].type_comment, "int")
         tree = self.classic_parse(vardecl)
         self.assertEqual(tree.body[0].type_comment, None)
 
     def test_ignores(self):
-        tree = self.parse(ignores)
-        self.assertEqual([ti.lineno for ti in tree.type_ignores], [2, 5])
+        for tree in self.parse_all(ignores):
+            self.assertEqual([ti.lineno for ti in tree.type_ignores], [2, 5])
         tree = self.classic_parse(ignores)
         self.assertEqual(tree.type_ignores, [])
 
     def test_longargs(self):
-        tree = self.parse(longargs)
-        for t in tree.body:
-            # The expected args are encoded in the function name
-            todo = set(t.name[1:])
-            self.assertEqual(len(t.args.args),
-                             len(todo) - bool(t.args.vararg) - bool(t.args.kwarg))
-            self.assertTrue(t.name.startswith('f'), t.name)
-            for c in t.name[1:]:
-                todo.remove(c)
-                if c == 'v':
-                    arg = t.args.vararg
-                elif c == 'k':
-                    arg = t.args.kwarg
-                else:
-                    assert 0 <= ord(c) - ord('a') < len(t.args.args)
-                    arg = t.args.args[ord(c) - ord('a')]
-                self.assertEqual(arg.arg, c)  # That's the argument name
-                self.assertEqual(arg.type_comment, arg.arg.upper())
-            assert not todo
+        for tree in self.parse_all(longargs):
+            for t in tree.body:
+                # The expected args are encoded in the function name
+                todo = set(t.name[1:])
+                self.assertEqual(len(t.args.args),
+                                 len(todo) - bool(t.args.vararg) - bool(t.args.kwarg))
+                self.assertTrue(t.name.startswith('f'), t.name)
+                for c in t.name[1:]:
+                    todo.remove(c)
+                    if c == 'v':
+                        arg = t.args.vararg
+                    elif c == 'k':
+                        arg = t.args.kwarg
+                    else:
+                        assert 0 <= ord(c) - ord('a') < len(t.args.args)
+                        arg = t.args.args[ord(c) - ord('a')]
+                    self.assertEqual(arg.arg, c)  # That's the argument name
+                    self.assertEqual(arg.type_comment, arg.arg.upper())
+                assert not todo
         tree = self.classic_parse(longargs)
         for t in tree.body:
             for arg in t.args.args + [t.args.vararg, t.args.kwarg]:
@@ -247,8 +308,8 @@ def test_inappropriate_type_comments(self):
 
         def check_both_ways(source):
             ast.parse(source, type_comments=False)
-            with self.assertRaises(SyntaxError):
-                ast.parse(source, type_comments=True)
+            for tree in self.parse_all(source, maxver=0):
+                pass
 
         check_both_ways("pass  # type: int\n")
         check_both_ways("foo()  # type: int\n")