gh-109596: Ensure repeated rules in the grammar are not allowed and fix incorrect soft keywords (#109606)

2024-11-23 01:45:25 +08:00 · 2023-09-22 19:03:23 +01:00 · 2023-09-22 19:03:23 +01:00 · b28ffaa193
commit b28ffaa193
parent 7c55399172
7 changed files with 1781 additions and 1797 deletions
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@ -19,8 +19,6 @@ _PyPegen_parse(Parser *p)
        result = eval_rule(p);
    } else if (p->start_rule == Py_func_type_input) {
        result = func_type_rule(p);
-    } else if (p->start_rule == Py_fstring_input) {
-        result = fstring_rule(p);
    }

    return result;
@ -89,7 +87,6 @@ file[mod_ty]: a=[statements] ENDMARKER { _PyPegen_make_module(p, a) }
 interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(a, p->arena) }
 eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { _PyAST_Expression(a, p->arena) }
 func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMARKER { _PyAST_FunctionType(a, b, p->arena) }
-fstring[expr_ty]: star_expressions

 # GENERAL STATEMENTS
 # ==================
@ -647,20 +644,20 @@ type_param_seq[asdl_type_param_seq*]: a[asdl_type_param_seq*]=','.type_param+ ['

 type_param[type_param_ty] (memo):
    | a=NAME b=[type_param_bound] { _PyAST_TypeVar(a->v.Name.id, b, EXTRA) }
-    | '*' a=NAME colon=":" e=expression {
+    | '*' a=NAME colon=':' e=expression {
            RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
                ? "cannot use constraints with TypeVarTuple"
                : "cannot use bound with TypeVarTuple")
        }
    | '*' a=NAME { _PyAST_TypeVarTuple(a->v.Name.id, EXTRA) }
-    | '**' a=NAME colon=":" e=expression {
+    | '**' a=NAME colon=':' e=expression {
            RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
                ? "cannot use constraints with ParamSpec"
                : "cannot use bound with ParamSpec")
        }
    | '**' a=NAME { _PyAST_ParamSpec(a->v.Name.id, EXTRA) }

-type_param_bound[expr_ty]: ":" e=expression { e }
+type_param_bound[expr_ty]: ':' e=expression { e }

 # EXPRESSIONS
 # -----------
@ -915,7 +912,7 @@ fstring_middle[expr_ty]:
    | fstring_replacement_field
    | t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
 fstring_replacement_field[expr_ty]:
-    | '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
+    | '{' a=(yield_expr | star_expressions) debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
        _PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
    | invalid_replacement_field
 fstring_conversion[ResultTokenWithMetadata*]:
--- a/Include/compile.h
+++ b/Include/compile.h
@ -10,9 +10,6 @@ extern "C" {
 #define Py_eval_input 258
 #define Py_func_type_input 345

-/* This doesn't need to match anything */
-#define Py_fstring_input 800
-
 #ifndef Py_LIMITED_API
 #  define Py_CPYTHON_COMPILE_H
 #  include "cpython/compile.h"
--- a/Lib/test/test_peg_generator/test_pegen.py
+++ b/Lib/test/test_peg_generator/test_pegen.py
@ -42,6 +42,15 @@ class TestPegen(unittest.TestCase):
        )
        self.assertEqual(repr(rules["term"]), expected_repr)

+    def test_repeated_rules(self) -> None:
+        grammar_source = """
+        start: the_rule NEWLINE
+        the_rule: 'b' NEWLINE
+        the_rule: 'a' NEWLINE
+        """
+        with self.assertRaisesRegex(GrammarError, "Repeated rule 'the_rule'"):
+            parse_string(grammar_source, GrammarParser)
+
    def test_long_rule_str(self) -> None:
        grammar_source = """
        start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
--- a/Builtins/2023-09-20-13-18-08.gh-issue-109596.RG0K2G.rst
+++ b/Builtins/2023-09-20-13-18-08.gh-issue-109596.RG0K2G.rst
@ -0,0 +1,3 @@
+Fix some tokens in the grammar that were incorrectly marked as soft
+keywords. Also fix some repeated rule names and ensure that repeated rules
+are not allowed. Patch by Pablo Galindo
--- a/Parser/parser.c
+++ b/Parser/parser.c
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@ -310,21 +310,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
        end_col_offset = p->tok->cur - p->tok->line_start;
    }

-    if (p->start_rule == Py_fstring_input) {
-        const char *fstring_msg = "f-string: ";
-        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
-
-        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
-        if (!new_errmsg) {
-            return (void *) PyErr_NoMemory();
-        }
-
-        // Copy both strings into new buffer
-        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
-        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
-        new_errmsg[len] = 0;
-        errmsg = new_errmsg;
-    }
    errstr = PyUnicode_FromFormatV(errmsg, va);
    if (!errstr) {
        goto error;
@ -363,11 +348,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
        }
    }

-    if (p->start_rule == Py_fstring_input) {
-        col_offset -= p->starting_col_offset;
-        end_col_offset -= p->starting_col_offset;
-    }
-
    Py_ssize_t col_number = col_offset;
    Py_ssize_t end_col_number = end_col_offset;

@ -398,17 +378,11 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,

    Py_DECREF(errstr);
    Py_DECREF(value);
-    if (p->start_rule == Py_fstring_input) {
-        PyMem_Free((void *)errmsg);
-    }
    return NULL;

 error:
    Py_XDECREF(errstr);
    Py_XDECREF(error_line);
-    if (p->start_rule == Py_fstring_input) {
-        PyMem_Free((void *)errmsg);
-    }
    return NULL;
 }

--- a/Tools/peg_generator/pegen/grammar.py
+++ b/Tools/peg_generator/pegen/grammar.py
@ -35,7 +35,13 @@ class GrammarVisitor:

 class Grammar:
    def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
-        self.rules = {rule.name: rule for rule in rules}
+        # Check if there are repeated rules in "rules"
+        all_rules = {}
+        for rule in rules:
+            if rule.name in all_rules:
+                raise GrammarError(f"Repeated rule {rule.name!r}")
+            all_rules[rule.name] = rule
+        self.rules = all_rules
        self.metas = dict(metas)

    def __str__(self) -> str: