mirror of
https://github.com/python/cpython.git
synced 2024-11-27 03:45:08 +08:00
Update pegen to use the latest upstream developments (GH-27586)
This commit is contained in:
parent
8e832fb2a2
commit
953d27261e
@ -11,8 +11,8 @@ from test import support
|
||||
from test.support import os_helper
|
||||
from test.support.script_helper import assert_python_ok
|
||||
|
||||
_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST')
|
||||
_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG')
|
||||
_py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
|
||||
_pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
|
||||
if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
|
||||
raise unittest.SkipTest("peg_generator test disabled under PGO build")
|
||||
|
||||
@ -458,3 +458,28 @@ class TestCParser(unittest.TestCase):
|
||||
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
|
||||
"""
|
||||
self.run_test(grammar_source, test_source)
|
||||
|
||||
def test_forced(self) -> None:
|
||||
grammar_source = """
|
||||
start: NAME &&':' | NAME
|
||||
"""
|
||||
test_source = """
|
||||
self.assertEqual(parse.parse_string("number :", mode=0), None)
|
||||
with self.assertRaises(SyntaxError) as e:
|
||||
parse.parse_string("a", mode=0)
|
||||
self.assertIn("expected ':'", str(e.exception))
|
||||
"""
|
||||
self.run_test(grammar_source, test_source)
|
||||
|
||||
def test_forced_with_group(self) -> None:
|
||||
grammar_source = """
|
||||
start: NAME &&(':' | ';') | NAME
|
||||
"""
|
||||
test_source = """
|
||||
self.assertEqual(parse.parse_string("number :", mode=0), None)
|
||||
self.assertEqual(parse.parse_string("number ;", mode=0), None)
|
||||
with self.assertRaises(SyntaxError) as e:
|
||||
parse.parse_string("a", mode=0)
|
||||
self.assertIn("expected (':' | ';')", e.exception.args[0])
|
||||
"""
|
||||
self.run_test(grammar_source, test_source)
|
||||
|
@ -3,8 +3,8 @@ import unittest
|
||||
from test import test_tools
|
||||
from typing import Dict, Set
|
||||
|
||||
test_tools.skip_if_missing('peg_generator')
|
||||
with test_tools.imports_under_tool('peg_generator'):
|
||||
test_tools.skip_if_missing("peg_generator")
|
||||
with test_tools.imports_under_tool("peg_generator"):
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.testutil import parse_string
|
||||
from pegen.first_sets import FirstSetCalculator
|
||||
@ -23,29 +23,38 @@ class TestFirstSets(unittest.TestCase):
|
||||
A: 'a' | '-'
|
||||
B: 'b' | '+'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"A": {"'a'", "'-'"},
|
||||
"B": {"'+'", "'b'"},
|
||||
"expr": {"'+'", "'a'", "'b'", "'-'"},
|
||||
"start": {"'+'", "'a'", "'b'", "'-'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"A": {"'a'", "'-'"},
|
||||
"B": {"'+'", "'b'"},
|
||||
"expr": {"'+'", "'a'", "'b'", "'-'"},
|
||||
"start": {"'+'", "'a'", "'b'", "'-'"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_optionals(self) -> None:
|
||||
grammar = """
|
||||
start: expr NEWLINE
|
||||
expr: ['a'] ['b'] 'c'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"expr": {"'c'", "'a'", "'b'"},
|
||||
"start": {"'c'", "'a'", "'b'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"expr": {"'c'", "'a'", "'b'"},
|
||||
"start": {"'c'", "'a'", "'b'"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_repeat_with_separator(self) -> None:
|
||||
grammar = """
|
||||
start: ','.thing+ NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
|
||||
)
|
||||
|
||||
def test_optional_operator(self) -> None:
|
||||
grammar = """
|
||||
@ -53,11 +62,14 @@ class TestFirstSets(unittest.TestCase):
|
||||
sum: (term)? 'b'
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"term": {"NUMBER"},
|
||||
"sum": {"NUMBER", "'b'"},
|
||||
"start": {"'b'", "NUMBER"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"term": {"NUMBER"},
|
||||
"sum": {"NUMBER", "'b'"},
|
||||
"start": {"'b'", "NUMBER"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_optional_literal(self) -> None:
|
||||
grammar = """
|
||||
@ -65,60 +77,83 @@ class TestFirstSets(unittest.TestCase):
|
||||
sum: '+' ? term
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"term": {"NUMBER"},
|
||||
"sum": {"'+'", "NUMBER"},
|
||||
"start": {"'+'", "NUMBER"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"term": {"NUMBER"},
|
||||
"sum": {"'+'", "NUMBER"},
|
||||
"start": {"'+'", "NUMBER"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_optional_after(self) -> None:
|
||||
grammar = """
|
||||
start: term NEWLINE
|
||||
term: NUMBER ['+']
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"term": {"NUMBER"}, "start": {"NUMBER"}},
|
||||
)
|
||||
|
||||
def test_optional_before(self) -> None:
|
||||
grammar = """
|
||||
start: term NEWLINE
|
||||
term: ['+'] NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
|
||||
)
|
||||
|
||||
def test_repeat_0(self) -> None:
|
||||
grammar = """
|
||||
start: thing* "+" NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
|
||||
)
|
||||
|
||||
def test_repeat_0_with_group(self) -> None:
|
||||
grammar = """
|
||||
start: ('+' '-')* term NEWLINE
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
|
||||
)
|
||||
|
||||
def test_repeat_1(self) -> None:
|
||||
grammar = """
|
||||
start: thing+ '-' NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
|
||||
)
|
||||
|
||||
def test_repeat_1_with_group(self) -> None:
|
||||
grammar = """
|
||||
start: ('+' term)+ term NEWLINE
|
||||
term: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
|
||||
)
|
||||
|
||||
def test_gather(self) -> None:
|
||||
grammar = """
|
||||
start: ','.thing+ NEWLINE
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
|
||||
)
|
||||
|
||||
def test_positive_lookahead(self) -> None:
|
||||
grammar = """
|
||||
@ -126,11 +161,14 @@ class TestFirstSets(unittest.TestCase):
|
||||
expr: &'a' opt
|
||||
opt: 'a' | 'b' | 'c'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"expr": {"'a'"},
|
||||
"start": {"'a'"},
|
||||
"opt": {"'b'", "'c'", "'a'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"expr": {"'a'"},
|
||||
"start": {"'a'"},
|
||||
"opt": {"'b'", "'c'", "'a'"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_negative_lookahead(self) -> None:
|
||||
grammar = """
|
||||
@ -138,11 +176,14 @@ class TestFirstSets(unittest.TestCase):
|
||||
expr: !'a' opt
|
||||
opt: 'a' | 'b' | 'c'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"opt": {"'b'", "'a'", "'c'"},
|
||||
"expr": {"'b'", "'c'"},
|
||||
"start": {"'b'", "'c'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"opt": {"'b'", "'a'", "'c'"},
|
||||
"expr": {"'b'", "'c'"},
|
||||
"start": {"'b'", "'c'"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_left_recursion(self) -> None:
|
||||
grammar = """
|
||||
@ -153,21 +194,27 @@ class TestFirstSets(unittest.TestCase):
|
||||
bar: 'bar'
|
||||
baz: 'baz'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"expr": {"NUMBER", "'-'"},
|
||||
"term": {"NUMBER"},
|
||||
"start": {"NUMBER", "'-'"},
|
||||
"foo": {"'foo'"},
|
||||
"bar": {"'bar'"},
|
||||
"baz": {"'baz'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"expr": {"NUMBER", "'-'"},
|
||||
"term": {"NUMBER"},
|
||||
"start": {"NUMBER", "'-'"},
|
||||
"foo": {"'foo'"},
|
||||
"bar": {"'bar'"},
|
||||
"baz": {"'baz'"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_advance_left_recursion(self) -> None:
|
||||
grammar = """
|
||||
start: NUMBER | sign start
|
||||
sign: ['-']
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
|
||||
)
|
||||
|
||||
def test_mutual_left_recursion(self) -> None:
|
||||
grammar = """
|
||||
@ -175,11 +222,14 @@ class TestFirstSets(unittest.TestCase):
|
||||
foo: bar 'A' | 'B'
|
||||
bar: foo 'C' | 'D'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"foo": {"'D'", "'B'"},
|
||||
"bar": {"'D'"},
|
||||
"start": {"'D'", "'B'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"foo": {"'D'", "'B'"},
|
||||
"bar": {"'D'"},
|
||||
"start": {"'D'", "'B'"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_nasty_left_recursion(self) -> None:
|
||||
# TODO: Validate this
|
||||
@ -188,7 +238,10 @@ class TestFirstSets(unittest.TestCase):
|
||||
target: maybe '+' | NAME
|
||||
maybe: maybe '-' | target
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
|
||||
)
|
||||
|
||||
def test_nullable_rule(self) -> None:
|
||||
grammar = """
|
||||
@ -196,17 +249,22 @@ class TestFirstSets(unittest.TestCase):
|
||||
sign: ['-']
|
||||
thing: NUMBER
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"sign": {"", "'-'"},
|
||||
"thing": {"NUMBER"},
|
||||
"start": {"NUMBER", "'-'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"sign": {"", "'-'"},
|
||||
"thing": {"NUMBER"},
|
||||
"start": {"NUMBER", "'-'"},
|
||||
},
|
||||
)
|
||||
|
||||
def test_epsilon_production_in_start_rule(self) -> None:
|
||||
grammar = """
|
||||
start: ['-'] $
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
|
||||
)
|
||||
|
||||
def test_multiple_nullable_rules(self) -> None:
|
||||
grammar = """
|
||||
@ -216,10 +274,13 @@ class TestFirstSets(unittest.TestCase):
|
||||
other: '*'
|
||||
another: '/'
|
||||
"""
|
||||
self.assertEqual(self.calculate_first_sets(grammar), {
|
||||
"sign": {"", "'-'"},
|
||||
"thing": {"'+'", ""},
|
||||
"start": {"'+'", "'-'", "'*'"},
|
||||
"other": {"'*'"},
|
||||
"another": {"'/'"},
|
||||
})
|
||||
self.assertEqual(
|
||||
self.calculate_first_sets(grammar),
|
||||
{
|
||||
"sign": {"", "'-'"},
|
||||
"thing": {"'+'", ""},
|
||||
"start": {"'+'", "'-'", "'*'"},
|
||||
"other": {"'*'"},
|
||||
"another": {"'/'"},
|
||||
},
|
||||
)
|
||||
|
@ -1,8 +1,8 @@
|
||||
import unittest
|
||||
from test import test_tools
|
||||
|
||||
test_tools.skip_if_missing('peg_generator')
|
||||
with test_tools.imports_under_tool('peg_generator'):
|
||||
test_tools.skip_if_missing("peg_generator")
|
||||
with test_tools.imports_under_tool("peg_generator"):
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.validator import SubRuleValidator, ValidationError
|
||||
from pegen.testutil import parse_string
|
||||
|
@ -1,3 +1,5 @@
|
||||
import ast
|
||||
import difflib
|
||||
import io
|
||||
import textwrap
|
||||
import unittest
|
||||
@ -6,14 +8,10 @@ from test import test_tools
|
||||
from typing import Dict, Any
|
||||
from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
|
||||
|
||||
test_tools.skip_if_missing('peg_generator')
|
||||
with test_tools.imports_under_tool('peg_generator'):
|
||||
test_tools.skip_if_missing("peg_generator")
|
||||
with test_tools.imports_under_tool("peg_generator"):
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.testutil import (
|
||||
parse_string,
|
||||
generate_parser,
|
||||
make_parser
|
||||
)
|
||||
from pegen.testutil import parse_string, generate_parser, make_parser
|
||||
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
|
||||
from pegen.grammar_visualizer import ASTGrammarPrinter
|
||||
from pegen.parser import Parser
|
||||
@ -38,7 +36,9 @@ class TestPegen(unittest.TestCase):
|
||||
# Check the str() and repr() of a few rules; AST nodes don't support ==.
|
||||
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
|
||||
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
|
||||
expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
|
||||
expected_repr = (
|
||||
"Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
|
||||
)
|
||||
self.assertEqual(repr(rules["term"]), expected_repr)
|
||||
|
||||
def test_long_rule_str(self) -> None:
|
||||
@ -71,7 +71,7 @@ class TestPegen(unittest.TestCase):
|
||||
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
|
||||
self.assertEqual(
|
||||
repr(rules["term"]),
|
||||
"Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
|
||||
"Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
|
||||
)
|
||||
|
||||
def test_gather(self) -> None:
|
||||
@ -81,24 +81,31 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
rules = parse_string(grammar, GrammarParser).rules
|
||||
self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
|
||||
self.assertTrue(repr(rules["start"]).startswith(
|
||||
"Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
|
||||
))
|
||||
self.assertTrue(
|
||||
repr(rules["start"]).startswith(
|
||||
"Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
|
||||
)
|
||||
)
|
||||
self.assertEqual(str(rules["thing"]), "thing: NUMBER")
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("42\n", parser_class)
|
||||
assert node == [
|
||||
[[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
|
||||
]
|
||||
node = parse_string("1, 2\n", parser_class)
|
||||
assert node == [
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n")],
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n")],
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"),
|
||||
]
|
||||
)
|
||||
|
||||
def test_expr_grammar(self) -> None:
|
||||
grammar = """
|
||||
@ -108,10 +115,13 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("42\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
|
||||
])
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
|
||||
],
|
||||
)
|
||||
|
||||
def test_optional_operator(self) -> None:
|
||||
grammar = """
|
||||
@ -120,22 +130,39 @@ class TestPegen(unittest.TestCase):
|
||||
term: NUMBER
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1+2\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
node = parse_string("1 + 2\n", parser_class)
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")],
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")],
|
||||
TokenInfo(
|
||||
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
|
||||
),
|
||||
[
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
|
||||
),
|
||||
],
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"),
|
||||
])
|
||||
)
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[
|
||||
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
||||
None,
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
],
|
||||
)
|
||||
|
||||
def test_optional_literal(self) -> None:
|
||||
grammar = """
|
||||
@ -145,18 +172,29 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1+\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")],
|
||||
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
|
||||
),
|
||||
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
|
||||
])
|
||||
)
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[
|
||||
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
||||
None,
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
],
|
||||
)
|
||||
|
||||
def test_alt_optional_operator(self) -> None:
|
||||
grammar = """
|
||||
@ -166,21 +204,38 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 + 2\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")],
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")],
|
||||
TokenInfo(
|
||||
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
|
||||
),
|
||||
[
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
|
||||
),
|
||||
],
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"),
|
||||
])
|
||||
)
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[
|
||||
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
||||
None,
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
],
|
||||
)
|
||||
|
||||
def test_repeat_0_simple(self) -> None:
|
||||
grammar = """
|
||||
@ -189,20 +244,32 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 2 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
|
||||
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
|
||||
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
|
||||
])
|
||||
)
|
||||
node = parse_string("1\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
|
||||
[],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
])
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
||||
[],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
||||
],
|
||||
)
|
||||
|
||||
def test_repeat_0_complex(self) -> None:
|
||||
grammar = """
|
||||
@ -211,24 +278,43 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 + 2 + 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
|
||||
),
|
||||
[
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
|
||||
]
|
||||
],
|
||||
[
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="2",
|
||||
start=(1, 4),
|
||||
end=(1, 5),
|
||||
line="1 + 2 + 3\n",
|
||||
),
|
||||
],
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
|
||||
]
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="3",
|
||||
start=(1, 8),
|
||||
end=(1, 9),
|
||||
line="1 + 2 + 3\n",
|
||||
),
|
||||
],
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
|
||||
])
|
||||
)
|
||||
|
||||
def test_repeat_1_simple(self) -> None:
|
||||
grammar = """
|
||||
@ -237,14 +323,23 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 2 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
|
||||
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
|
||||
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
|
||||
])
|
||||
)
|
||||
with self.assertRaises(SyntaxError):
|
||||
parse_string("1\n", parser_class)
|
||||
|
||||
@ -255,24 +350,43 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1 + 2 + 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
|
||||
),
|
||||
[
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
|
||||
]
|
||||
],
|
||||
[
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="2",
|
||||
start=(1, 4),
|
||||
end=(1, 5),
|
||||
line="1 + 2 + 3\n",
|
||||
),
|
||||
],
|
||||
[
|
||||
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
|
||||
]
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="3",
|
||||
start=(1, 8),
|
||||
end=(1, 9),
|
||||
line="1 + 2 + 3\n",
|
||||
),
|
||||
],
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
|
||||
])
|
||||
)
|
||||
with self.assertRaises(SyntaxError):
|
||||
parse_string("1\n", parser_class)
|
||||
|
||||
@ -283,14 +397,25 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("1, 2, 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")],
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")],
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")],
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"),
|
||||
])
|
||||
)
|
||||
|
||||
def test_left_recursive(self) -> None:
|
||||
grammar_source = """
|
||||
@ -311,18 +436,41 @@ class TestPegen(unittest.TestCase):
|
||||
self.assertFalse(rules["bar"].left_recursive)
|
||||
self.assertFalse(rules["baz"].left_recursive)
|
||||
node = parse_string("1 + 2 + 3\n", parser_class)
|
||||
self.assertEqual(node, [
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[
|
||||
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]],
|
||||
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="1",
|
||||
start=(1, 0),
|
||||
end=(1, 1),
|
||||
line="1 + 2 + 3\n",
|
||||
),
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="2",
|
||||
start=(1, 4),
|
||||
end=(1, 5),
|
||||
line="1 + 2 + 3\n",
|
||||
),
|
||||
],
|
||||
TokenInfo(
|
||||
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
|
||||
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
|
||||
TokenInfo(
|
||||
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
|
||||
),
|
||||
],
|
||||
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
|
||||
])
|
||||
)
|
||||
|
||||
def test_python_expr(self) -> None:
|
||||
grammar = """
|
||||
@ -392,31 +540,79 @@ class TestPegen(unittest.TestCase):
|
||||
exec(out.getvalue(), ns)
|
||||
parser_class: Type[Parser] = ns["GeneratedParser"]
|
||||
node = parse_string("D A C A E", parser_class)
|
||||
self.assertEqual(node, [
|
||||
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[
|
||||
[
|
||||
[TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
|
||||
TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
|
||||
[
|
||||
TokenInfo(
|
||||
type=NAME,
|
||||
string="D",
|
||||
start=(1, 0),
|
||||
end=(1, 1),
|
||||
line="D A C A E",
|
||||
),
|
||||
TokenInfo(
|
||||
type=NAME,
|
||||
string="A",
|
||||
start=(1, 2),
|
||||
end=(1, 3),
|
||||
line="D A C A E",
|
||||
),
|
||||
],
|
||||
TokenInfo(
|
||||
type=NAME,
|
||||
string="C",
|
||||
start=(1, 4),
|
||||
end=(1, 5),
|
||||
line="D A C A E",
|
||||
),
|
||||
],
|
||||
TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
|
||||
TokenInfo(
|
||||
type=NAME,
|
||||
string="A",
|
||||
start=(1, 6),
|
||||
end=(1, 7),
|
||||
line="D A C A E",
|
||||
),
|
||||
],
|
||||
TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
|
||||
TokenInfo(
|
||||
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
|
||||
),
|
||||
],
|
||||
TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
|
||||
])
|
||||
)
|
||||
node = parse_string("B C A E", parser_class)
|
||||
self.assertIsNotNone(node)
|
||||
self.assertEqual(node, [
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
[
|
||||
[TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
|
||||
TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
|
||||
[
|
||||
TokenInfo(
|
||||
type=NAME,
|
||||
string="B",
|
||||
start=(1, 0),
|
||||
end=(1, 1),
|
||||
line="B C A E",
|
||||
),
|
||||
TokenInfo(
|
||||
type=NAME,
|
||||
string="C",
|
||||
start=(1, 2),
|
||||
end=(1, 3),
|
||||
line="B C A E",
|
||||
),
|
||||
],
|
||||
TokenInfo(
|
||||
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
|
||||
),
|
||||
],
|
||||
TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
|
||||
TokenInfo(
|
||||
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
|
||||
),
|
||||
],
|
||||
TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
|
||||
])
|
||||
)
|
||||
|
||||
def test_nasty_mutually_left_recursive(self) -> None:
|
||||
# This grammar does not recognize 'x - + =', much to my chagrin.
|
||||
@ -454,43 +650,44 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("foo = 12 + 12 .", parser_class)
|
||||
self.assertEqual(node, [
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
TokenInfo(
|
||||
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
|
||||
),
|
||||
TokenInfo(
|
||||
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
|
||||
),
|
||||
[
|
||||
[TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")],
|
||||
TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."),
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="12",
|
||||
start=(1, 6),
|
||||
end=(1, 8),
|
||||
line="foo = 12 + 12 .",
|
||||
),
|
||||
[
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ."
|
||||
)
|
||||
],
|
||||
[
|
||||
[
|
||||
[
|
||||
TokenInfo(
|
||||
OP,
|
||||
string="+",
|
||||
start=(1, 9),
|
||||
end=(1, 10),
|
||||
line="foo = 12 + 12 .",
|
||||
),
|
||||
[
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="12",
|
||||
start=(1, 11),
|
||||
end=(1, 13),
|
||||
line="foo = 12 + 12 .",
|
||||
)
|
||||
],
|
||||
]
|
||||
]
|
||||
],
|
||||
OP,
|
||||
string="+",
|
||||
start=(1, 9),
|
||||
end=(1, 10),
|
||||
line="foo = 12 + 12 .",
|
||||
),
|
||||
TokenInfo(
|
||||
NUMBER,
|
||||
string="12",
|
||||
start=(1, 11),
|
||||
end=(1, 13),
|
||||
line="foo = 12 + 12 .",
|
||||
),
|
||||
]
|
||||
],
|
||||
]
|
||||
]
|
||||
])
|
||||
],
|
||||
],
|
||||
)
|
||||
|
||||
def test_named_lookahead_error(self) -> None:
|
||||
grammar = """
|
||||
@ -533,11 +730,14 @@ class TestPegen(unittest.TestCase):
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
node = parse_string("(1)", parser_class)
|
||||
self.assertEqual(node, [
|
||||
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
|
||||
[TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")],
|
||||
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
|
||||
])
|
||||
self.assertEqual(
|
||||
node,
|
||||
[
|
||||
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
|
||||
TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
|
||||
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
|
||||
],
|
||||
)
|
||||
|
||||
def test_dangling_reference(self) -> None:
|
||||
grammar = """
|
||||
@ -589,6 +789,124 @@ class TestPegen(unittest.TestCase):
|
||||
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
|
||||
parser_class = make_parser(grammar)
|
||||
|
||||
def test_soft_keyword(self) -> None:
|
||||
grammar = """
|
||||
start:
|
||||
| "number" n=NUMBER { eval(n.string) }
|
||||
| "string" n=STRING { n.string }
|
||||
| SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"}
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1)
|
||||
self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'")
|
||||
self.assertEqual(
|
||||
parse_string("number test 1", parser_class, verbose=True), "test = 1"
|
||||
)
|
||||
assert (
|
||||
parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'"
|
||||
)
|
||||
with self.assertRaises(SyntaxError):
|
||||
parse_string("test 1", parser_class, verbose=True)
|
||||
|
||||
def test_forced(self) -> None:
|
||||
grammar = """
|
||||
start: NAME &&':' | NAME
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
self.assertTrue(parse_string("number :", parser_class, verbose=True))
|
||||
with self.assertRaises(SyntaxError) as e:
|
||||
parse_string("a", parser_class, verbose=True)
|
||||
|
||||
self.assertIn("expected ':'", str(e.exception))
|
||||
|
||||
def test_forced_with_group(self) -> None:
|
||||
grammar = """
|
||||
start: NAME &&(':' | ';') | NAME
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
self.assertTrue(parse_string("number :", parser_class, verbose=True))
|
||||
self.assertTrue(parse_string("number ;", parser_class, verbose=True))
|
||||
with self.assertRaises(SyntaxError) as e:
|
||||
parse_string("a", parser_class, verbose=True)
|
||||
self.assertIn("expected (':' | ';')", e.exception.args[0])
|
||||
|
||||
def test_unreachable_explicit(self) -> None:
|
||||
source = """
|
||||
start: NAME { UNREACHABLE }
|
||||
"""
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(
|
||||
grammar, out, unreachable_formatting="This is a test"
|
||||
)
|
||||
genr.generate("<string>")
|
||||
self.assertIn("This is a test", out.getvalue())
|
||||
|
||||
def test_unreachable_implicit1(self) -> None:
|
||||
source = """
|
||||
start: NAME | invalid_input
|
||||
invalid_input: NUMBER { None }
|
||||
"""
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(
|
||||
grammar, out, unreachable_formatting="This is a test"
|
||||
)
|
||||
genr.generate("<string>")
|
||||
self.assertIn("This is a test", out.getvalue())
|
||||
|
||||
def test_unreachable_implicit2(self) -> None:
|
||||
source = """
|
||||
start: NAME | '(' invalid_input ')'
|
||||
invalid_input: NUMBER { None }
|
||||
"""
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(
|
||||
grammar, out, unreachable_formatting="This is a test"
|
||||
)
|
||||
genr.generate("<string>")
|
||||
self.assertIn("This is a test", out.getvalue())
|
||||
|
||||
def test_unreachable_implicit3(self) -> None:
|
||||
source = """
|
||||
start: NAME | invalid_input { None }
|
||||
invalid_input: NUMBER
|
||||
"""
|
||||
grammar = parse_string(source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(
|
||||
grammar, out, unreachable_formatting="This is a test"
|
||||
)
|
||||
genr.generate("<string>")
|
||||
self.assertNotIn("This is a test", out.getvalue())
|
||||
|
||||
def test_locations_in_alt_action_and_group(self) -> None:
|
||||
grammar = """
|
||||
start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) }
|
||||
term:
|
||||
| l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
|
||||
| l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
|
||||
| factor
|
||||
factor:
|
||||
| (
|
||||
n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
|
||||
n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
|
||||
)
|
||||
"""
|
||||
parser_class = make_parser(grammar)
|
||||
source = "2*3\n"
|
||||
o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
|
||||
p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
|
||||
" kind=None,", ""
|
||||
)
|
||||
diff = "\n".join(
|
||||
difflib.unified_diff(
|
||||
o.split("\n"), p.split("\n"), "cpython", "python-pegen"
|
||||
)
|
||||
)
|
||||
self.assertFalse(diff)
|
||||
|
||||
|
||||
class TestGrammarVisitor:
|
||||
class Visitor(GrammarVisitor):
|
||||
|
@ -1176,7 +1176,7 @@ statements_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ statements[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "statement+"));
|
||||
_res = ( asdl_stmt_seq * ) _PyPegen_seq_flatten ( p , a );
|
||||
_res = ( asdl_stmt_seq* ) _PyPegen_seq_flatten ( p , a );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -1217,7 +1217,7 @@ statement_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ statement[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt"));
|
||||
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
|
||||
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -1294,7 +1294,7 @@ statement_newline_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ statement_newline[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt NEWLINE"));
|
||||
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
|
||||
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -1346,7 +1346,7 @@ statement_newline_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) );
|
||||
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -1416,7 +1416,7 @@ simple_stmts_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ simple_stmts[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "simple_stmt !';' NEWLINE"));
|
||||
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
|
||||
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -2403,7 +2403,7 @@ augassign_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ augassign[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@='"));
|
||||
_res = CHECK_VERSION ( AugOperator * , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) );
|
||||
_res = CHECK_VERSION ( AugOperator* , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -2841,7 +2841,7 @@ global_stmt_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Global ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
|
||||
_res = _PyAST_Global ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -2903,7 +2903,7 @@ nonlocal_stmt_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
|
||||
_res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -3460,7 +3460,7 @@ import_from_targets_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = ( asdl_alias_seq * ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) );
|
||||
_res = ( asdl_alias_seq* ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -4649,7 +4649,7 @@ slash_with_default_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' ','"));
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -4681,7 +4681,7 @@ slash_with_default_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' &')'"));
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -5340,7 +5340,7 @@ if_stmt_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
|
||||
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -5478,7 +5478,7 @@ elif_stmt_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
|
||||
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -6756,7 +6756,7 @@ subject_expr_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA );
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -9049,7 +9049,7 @@ mapping_pattern_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA );
|
||||
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -9092,7 +9092,7 @@ mapping_pattern_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA );
|
||||
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -9381,7 +9381,7 @@ class_pattern_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
|
||||
_res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -9433,7 +9433,7 @@ class_pattern_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
|
||||
_res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -9642,7 +9642,7 @@ expressions_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -9678,7 +9678,7 @@ expressions_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -10004,7 +10004,7 @@ star_expressions_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -10040,7 +10040,7 @@ star_expressions_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -10485,7 +10485,7 @@ disjunction_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
|
||||
_res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -10571,7 +10571,7 @@ conjunction_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
|
||||
_res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -10739,7 +10739,7 @@ comparison_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Compare ( a , CHECK ( asdl_int_seq * , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_exprs ( p , b ) ) , EXTRA );
|
||||
_res = _PyAST_Compare ( a , CHECK ( asdl_int_seq* , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_exprs ( p , b ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -12837,7 +12837,7 @@ primary_raw(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
|
||||
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -13896,7 +13896,7 @@ lambda_slash_with_default_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' ','"));
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -13928,7 +13928,7 @@ lambda_slash_with_default_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' &':'"));
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
|
||||
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -14689,7 +14689,7 @@ dict_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Dict ( CHECK ( asdl_expr_seq * , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_values ( p , a ) ) , EXTRA );
|
||||
_res = _PyAST_Dict ( CHECK ( asdl_expr_seq* , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_values ( p , a ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -15556,7 +15556,7 @@ args_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq * , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq * , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA );
|
||||
_res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq* , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq* , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -16026,7 +16026,7 @@ star_targets_rule(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA );
|
||||
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -16119,7 +16119,7 @@ star_targets_tuple_seq_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ((',' star_target))+ ','?"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_seq_insert_in_front ( p , a , b );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_seq_insert_in_front ( p , a , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -16146,7 +16146,7 @@ star_targets_tuple_seq_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ','"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -16923,7 +16923,7 @@ t_primary_raw(Parser *p)
|
||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||
int _end_col_offset = _token->end_col_offset;
|
||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
|
||||
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -17474,7 +17474,7 @@ type_expressions_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression ',' '**' expression"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_seq_append_to_end ( p , a , b ) ) , c );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_seq_append_to_end ( p , a , b ) ) , c );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -17507,7 +17507,7 @@ type_expressions_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -17540,7 +17540,7 @@ type_expressions_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '**' expression"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -17576,7 +17576,7 @@ type_expressions_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression ',' '**' expression"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_singleton_seq ( p , a ) ) , b );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_singleton_seq ( p , a ) ) , b );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -17603,7 +17603,7 @@ type_expressions_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -17630,7 +17630,7 @@ type_expressions_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' expression"));
|
||||
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
|
||||
_res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
@ -20149,7 +20149,7 @@ invalid_match_stmt_rule(Parser *p)
|
||||
)
|
||||
{
|
||||
D(fprintf(stderr, "%*c+ invalid_match_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "\"match\" subject_expr !':'"));
|
||||
_res = CHECK_VERSION ( void * , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) );
|
||||
_res = CHECK_VERSION ( void* , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) );
|
||||
if (_res == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
D(p->level--);
|
||||
|
@ -897,6 +897,19 @@ _PyPegen_expect_token(Parser *p, int type)
|
||||
return t;
|
||||
}
|
||||
|
||||
void*
|
||||
_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
|
||||
|
||||
if (p->error_indicator == 1) {
|
||||
return NULL;
|
||||
}
|
||||
if (result == NULL) {
|
||||
RAISE_SYNTAX_ERROR("expected (%s)", expected);
|
||||
return NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Token *
|
||||
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
|
||||
|
||||
|
@ -130,6 +130,7 @@ int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*),
|
||||
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||
|
||||
Token *_PyPegen_expect_token(Parser *p, int type);
|
||||
void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
|
||||
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
|
||||
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
||||
expr_ty _PyPegen_soft_keyword_token(Parser *p);
|
||||
|
@ -1,5 +1,5 @@
|
||||
[mypy]
|
||||
files = pegen, scripts
|
||||
files = pegen
|
||||
|
||||
follow_imports = error
|
||||
no_implicit_optional = True
|
||||
|
@ -100,7 +100,9 @@ c_parser.add_argument(
|
||||
"--optimized", action="store_true", help="Compile the extension in optimized mode"
|
||||
)
|
||||
c_parser.add_argument(
|
||||
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
|
||||
"--skip-actions",
|
||||
action="store_true",
|
||||
help="Suppress code emission for rule actions",
|
||||
)
|
||||
|
||||
python_parser = subparsers.add_parser("python", help="Generate Python code")
|
||||
@ -114,7 +116,9 @@ python_parser.add_argument(
|
||||
help="Where to write the generated parser",
|
||||
)
|
||||
python_parser.add_argument(
|
||||
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
|
||||
"--skip-actions",
|
||||
action="store_true",
|
||||
help="Suppress code emission for rule actions",
|
||||
)
|
||||
|
||||
|
||||
|
@ -6,9 +6,17 @@ always fail. We rely on string comparison of the base classes instead.
|
||||
TODO: Remove the above-described hack.
|
||||
"""
|
||||
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
def ast_dump(node, annotate_fields=True, include_attributes=False, *, indent=None):
|
||||
def _format(node, level=0):
|
||||
|
||||
def ast_dump(
|
||||
node: Any,
|
||||
annotate_fields: bool = True,
|
||||
include_attributes: bool = False,
|
||||
*,
|
||||
indent: Optional[str] = None,
|
||||
) -> str:
|
||||
def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
|
||||
if indent is not None:
|
||||
level += 1
|
||||
prefix = "\n" + indent * level
|
||||
|
@ -58,7 +58,7 @@ def compile_c_extension(
|
||||
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
|
||||
extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
|
||||
# Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
|
||||
extra_compile_args.append('-D_Py_TEST_PEGEN')
|
||||
extra_compile_args.append("-D_Py_TEST_PEGEN")
|
||||
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
|
||||
if keep_asserts:
|
||||
extra_compile_args.append("-UNDEBUG")
|
||||
@ -175,7 +175,10 @@ def build_c_generator(
|
||||
|
||||
|
||||
def build_python_generator(
|
||||
grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
|
||||
grammar: Grammar,
|
||||
grammar_file: str,
|
||||
output_file: str,
|
||||
skip_actions: bool = False,
|
||||
) -> ParserGenerator:
|
||||
with open(output_file, "w") as file:
|
||||
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
|
||||
@ -246,5 +249,10 @@ def build_python_parser_and_generator(
|
||||
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
||||
"""
|
||||
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
||||
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
|
||||
gen = build_python_generator(
|
||||
grammar,
|
||||
grammar_file,
|
||||
output_file,
|
||||
skip_actions=skip_actions,
|
||||
)
|
||||
return grammar, parser, tokenizer, gen
|
||||
|
@ -12,6 +12,7 @@ from pegen.grammar import (
|
||||
Gather,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Leaf,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
@ -91,7 +92,16 @@ class FunctionCall:
|
||||
parts.append(", 1")
|
||||
if self.assigned_variable:
|
||||
if self.assigned_variable_type:
|
||||
parts = ["(", self.assigned_variable, " = ", '(', self.assigned_variable_type, ')', *parts, ")"]
|
||||
parts = [
|
||||
"(",
|
||||
self.assigned_variable,
|
||||
" = ",
|
||||
"(",
|
||||
self.assigned_variable_type,
|
||||
")",
|
||||
*parts,
|
||||
")",
|
||||
]
|
||||
else:
|
||||
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
|
||||
if self.comment:
|
||||
@ -256,9 +266,10 @@ class CCallMakerVisitor(GrammarVisitor):
|
||||
|
||||
def visit_Forced(self, node: Forced) -> FunctionCall:
|
||||
call = self.generate_call(node.node)
|
||||
if call.nodetype == NodeTypes.GENERIC_TOKEN:
|
||||
if isinstance(node.node, Leaf):
|
||||
assert isinstance(node.node, Leaf)
|
||||
val = ast.literal_eval(node.node.value)
|
||||
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
||||
assert val in self.exact_tokens, f"{node.node.value} is not a known literal"
|
||||
type = self.exact_tokens[val]
|
||||
return FunctionCall(
|
||||
assigned_variable="_literal",
|
||||
@ -268,9 +279,19 @@ class CCallMakerVisitor(GrammarVisitor):
|
||||
return_type="Token *",
|
||||
comment=f"forced_token='{val}'",
|
||||
)
|
||||
if isinstance(node.node, Group):
|
||||
call = self.visit(node.node.rhs)
|
||||
call.assigned_variable = None
|
||||
call.comment = None
|
||||
return FunctionCall(
|
||||
assigned_variable="_literal",
|
||||
function=f"_PyPegen_expect_forced_result",
|
||||
arguments=["p", str(call), f'"{node.node.rhs!s}"'],
|
||||
return_type="void *",
|
||||
comment=f"forced_token=({node.node.rhs!s})",
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Forced tokens don't work with {call.nodetype} tokens")
|
||||
raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes")
|
||||
|
||||
def visit_Opt(self, node: Opt) -> FunctionCall:
|
||||
call = self.generate_call(node.node)
|
||||
@ -347,7 +368,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
debug: bool = False,
|
||||
skip_actions: bool = False,
|
||||
):
|
||||
super().__init__(grammar, tokens, file)
|
||||
super().__init__(grammar, set(tokens.values()), file)
|
||||
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
|
||||
self, exact_tokens, non_exact_tokens
|
||||
)
|
||||
@ -386,7 +407,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
self.print(f"goto {goto_target};")
|
||||
self.print(f"}}")
|
||||
|
||||
def out_of_memory_return(self, expr: str, cleanup_code: Optional[str] = None,) -> None:
|
||||
def out_of_memory_return(
|
||||
self,
|
||||
expr: str,
|
||||
cleanup_code: Optional[str] = None,
|
||||
) -> None:
|
||||
self.print(f"if ({expr}) {{")
|
||||
with self.indent():
|
||||
if cleanup_code is not None:
|
||||
@ -568,7 +593,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
|
||||
self._set_up_token_start_metadata_extraction()
|
||||
self.visit(
|
||||
rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name,
|
||||
rhs,
|
||||
is_loop=False,
|
||||
is_gather=node.is_gather(),
|
||||
rulename=node.name,
|
||||
)
|
||||
if self.debug:
|
||||
self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));')
|
||||
@ -601,7 +629,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
|
||||
self._set_up_token_start_metadata_extraction()
|
||||
self.visit(
|
||||
rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name,
|
||||
rhs,
|
||||
is_loop=True,
|
||||
is_gather=node.is_gather(),
|
||||
rulename=node.name,
|
||||
)
|
||||
if is_repeat1:
|
||||
self.print("if (_n == 0 || p->error_indicator) {")
|
||||
@ -771,7 +802,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
def visit_Alt(
|
||||
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
|
||||
) -> None:
|
||||
if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'):
|
||||
if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
|
||||
self.print(f"if (p->call_invalid_rules) {{ // {node}")
|
||||
else:
|
||||
self.print(f"{{ // {node}")
|
||||
@ -791,7 +822,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
if v == "_cut_var":
|
||||
v += " = 0" # cut_var must be initialized
|
||||
self.print(f"{var_type}{v};")
|
||||
if v.startswith("_opt_var"):
|
||||
if v and v.startswith("_opt_var"):
|
||||
self.print(f"UNUSED({v}); // Silence compiler warnings")
|
||||
|
||||
with self.local_variable_context():
|
||||
|
@ -29,7 +29,8 @@ from pegen.grammar import (
|
||||
)
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="calculate_first_sets", description="Calculate the first sets of a grammar",
|
||||
prog="calculate_first_sets",
|
||||
description="Calculate the first sets of a grammar",
|
||||
)
|
||||
argparser.add_argument("grammar_file", help="The grammar file")
|
||||
|
||||
|
@ -2,7 +2,10 @@
|
||||
# @generated by pegen from ./Tools/peg_generator/pegen/metagrammar.gram
|
||||
|
||||
import ast
|
||||
from typing import Optional, Any
|
||||
import sys
|
||||
import tokenize
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from pegen.parser import memoize, memoize_left_rec, logger, Parser
|
||||
from ast import literal_eval
|
||||
@ -35,83 +38,71 @@ from pegen.grammar import (
|
||||
StringLeaf,
|
||||
)
|
||||
|
||||
# Keywords and soft keywords are listed at the end of the parser definition.
|
||||
class GeneratedParser(Parser):
|
||||
|
||||
@memoize
|
||||
def start(self) -> Optional[Grammar]:
|
||||
# start: grammar $
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(grammar := self.grammar())
|
||||
and
|
||||
(endmarker := self.expect('ENDMARKER'))
|
||||
(_endmarker := self.expect('ENDMARKER'))
|
||||
):
|
||||
return grammar
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def grammar(self) -> Optional[Grammar]:
|
||||
# grammar: metas rules | rules
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(metas := self.metas())
|
||||
and
|
||||
(rules := self.rules())
|
||||
):
|
||||
return Grammar ( rules , metas )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(rules := self.rules())
|
||||
):
|
||||
return Grammar ( rules , [ ] )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return Grammar ( rules , [] )
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def metas(self) -> Optional[MetaList]:
|
||||
# metas: meta metas | meta
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(meta := self.meta())
|
||||
and
|
||||
(metas := self.metas())
|
||||
):
|
||||
return [ meta ] + metas
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return [meta] + metas
|
||||
self._reset(mark)
|
||||
if (
|
||||
(meta := self.meta())
|
||||
):
|
||||
return [ meta ]
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return [meta]
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def meta(self) -> Optional[MetaTuple]:
|
||||
# meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(literal := self.expect("@"))
|
||||
and
|
||||
(name := self.name())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return ( name . string , None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(literal := self.expect("@"))
|
||||
and
|
||||
@ -119,12 +110,10 @@ class GeneratedParser(Parser):
|
||||
and
|
||||
(b := self.name())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return ( a . string , b . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(literal := self.expect("@"))
|
||||
and
|
||||
@ -132,40 +121,34 @@ class GeneratedParser(Parser):
|
||||
and
|
||||
(string := self.string())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return ( name . string , literal_eval ( string . string ) )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def rules(self) -> Optional[RuleList]:
|
||||
# rules: rule rules | rule
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(rule := self.rule())
|
||||
and
|
||||
(rules := self.rules())
|
||||
):
|
||||
return [ rule ] + rules
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return [rule] + rules
|
||||
self._reset(mark)
|
||||
if (
|
||||
(rule := self.rule())
|
||||
):
|
||||
return [ rule ]
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return [rule]
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def rule(self) -> Optional[Rule]:
|
||||
# rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
@ -175,18 +158,16 @@ class GeneratedParser(Parser):
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
and
|
||||
(indent := self.expect('INDENT'))
|
||||
(_indent := self.expect('INDENT'))
|
||||
and
|
||||
(more_alts := self.more_alts())
|
||||
and
|
||||
(dedent := self.expect('DEDENT'))
|
||||
(_dedent := self.expect('DEDENT'))
|
||||
):
|
||||
return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
@ -194,18 +175,16 @@ class GeneratedParser(Parser):
|
||||
and
|
||||
(literal := self.expect(":"))
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
and
|
||||
(indent := self.expect('INDENT'))
|
||||
(_indent := self.expect('INDENT'))
|
||||
and
|
||||
(more_alts := self.more_alts())
|
||||
and
|
||||
(dedent := self.expect('DEDENT'))
|
||||
(_dedent := self.expect('DEDENT'))
|
||||
):
|
||||
return Rule ( rulename [ 0 ] , rulename [ 1 ] , more_alts , memo = opt )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
@ -215,76 +194,49 @@ class GeneratedParser(Parser):
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return Rule ( rulename [ 0 ] , rulename [ 1 ] , alts , memo = opt )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return Rule ( rulename [0] , rulename [1] , alts , memo = opt )
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def rulename(self) -> Optional[RuleName]:
|
||||
# rulename: NAME '[' NAME '*' ']' | NAME '[' NAME ']' | NAME
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
# rulename: NAME annotation | NAME
|
||||
mark = self._mark()
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect('['))
|
||||
and
|
||||
(type := self.name())
|
||||
and
|
||||
(literal_1 := self.expect('*'))
|
||||
and
|
||||
(literal_2 := self.expect(']'))
|
||||
(annotation := self.annotation())
|
||||
):
|
||||
return ( name . string , type . string + "*" )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect('['))
|
||||
and
|
||||
(type := self.name())
|
||||
and
|
||||
(literal_1 := self.expect(']'))
|
||||
):
|
||||
return ( name . string , type . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return ( name . string , annotation )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(name := self.name())
|
||||
):
|
||||
return ( name . string , None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def memoflag(self) -> Optional[str]:
|
||||
# memoflag: '(' 'memo' ')'
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
# memoflag: '(' "memo" ')'
|
||||
mark = self._mark()
|
||||
if (
|
||||
(literal := self.expect('('))
|
||||
and
|
||||
(literal_1 := self.expect('memo'))
|
||||
(literal_1 := self.expect("memo"))
|
||||
and
|
||||
(literal_2 := self.expect(')'))
|
||||
):
|
||||
return "memo"
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def alts(self) -> Optional[Rhs]:
|
||||
# alts: alt "|" alts | alt
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(alt := self.alt())
|
||||
and
|
||||
@ -292,53 +244,45 @@ class GeneratedParser(Parser):
|
||||
and
|
||||
(alts := self.alts())
|
||||
):
|
||||
return Rhs ( [ alt ] + alts . alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return Rhs ( [alt] + alts . alts )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(alt := self.alt())
|
||||
):
|
||||
return Rhs ( [ alt ] )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return Rhs ( [alt] )
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def more_alts(self) -> Optional[Rhs]:
|
||||
# more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(literal := self.expect("|"))
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
and
|
||||
(more_alts := self.more_alts())
|
||||
):
|
||||
return Rhs ( alts . alts + more_alts . alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(literal := self.expect("|"))
|
||||
and
|
||||
(alts := self.alts())
|
||||
and
|
||||
(newline := self.expect('NEWLINE'))
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return Rhs ( alts . alts )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def alt(self) -> Optional[Alt]:
|
||||
# alt: items '$' action | items '$' | items action | items
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(items := self.items())
|
||||
and
|
||||
@ -346,101 +290,65 @@ class GeneratedParser(Parser):
|
||||
and
|
||||
(action := self.action())
|
||||
):
|
||||
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = action )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = action )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(items := self.items())
|
||||
and
|
||||
(literal := self.expect('$'))
|
||||
):
|
||||
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = None )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(items := self.items())
|
||||
and
|
||||
(action := self.action())
|
||||
):
|
||||
return Alt ( items , action = action )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(items := self.items())
|
||||
):
|
||||
return Alt ( items , action = None )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def items(self) -> Optional[NamedItemList]:
|
||||
# items: named_item items | named_item
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(named_item := self.named_item())
|
||||
and
|
||||
(items := self.items())
|
||||
):
|
||||
return [ named_item ] + items
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return [named_item] + items
|
||||
self._reset(mark)
|
||||
if (
|
||||
(named_item := self.named_item())
|
||||
):
|
||||
return [ named_item ]
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return [named_item]
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def named_item(self) -> Optional[NamedItem]:
|
||||
# named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
|
||||
mark = self.mark()
|
||||
# named_item: NAME annotation '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect('['))
|
||||
(annotation := self.annotation())
|
||||
and
|
||||
(type := self.name())
|
||||
and
|
||||
(literal_1 := self.expect('*'))
|
||||
and
|
||||
(literal_2 := self.expect(']'))
|
||||
and
|
||||
(literal_3 := self.expect('='))
|
||||
(literal := self.expect('='))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(item := self.item())
|
||||
):
|
||||
return NamedItem ( name . string , item , f"{type.string}*" )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect('['))
|
||||
and
|
||||
(type := self.name())
|
||||
and
|
||||
(literal_1 := self.expect(']'))
|
||||
and
|
||||
(literal_2 := self.expect('='))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(item := self.item())
|
||||
):
|
||||
return NamedItem ( name . string , item , type . string )
|
||||
self.reset(mark)
|
||||
return NamedItem ( name . string , item , annotation )
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
@ -453,35 +361,29 @@ class GeneratedParser(Parser):
|
||||
(item := self.item())
|
||||
):
|
||||
return NamedItem ( name . string , item )
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(item := self.item())
|
||||
):
|
||||
return NamedItem ( None , item )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(it := self.forced_atom())
|
||||
(forced := self.forced_atom())
|
||||
):
|
||||
return NamedItem ( None , it )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
return NamedItem ( None , forced )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(it := self.lookahead())
|
||||
):
|
||||
return NamedItem ( None , it )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def forced_atom(self) -> Optional[NamedItem]:
|
||||
def forced_atom(self) -> Optional[Forced]:
|
||||
# forced_atom: '&' '&' ~ atom
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('&'))
|
||||
@ -493,14 +395,14 @@ class GeneratedParser(Parser):
|
||||
(atom := self.atom())
|
||||
):
|
||||
return Forced ( atom )
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def lookahead(self) -> Optional[LookaheadOrCut]:
|
||||
# lookahead: '&' ~ atom | '!' ~ atom | '~'
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('&'))
|
||||
@ -510,7 +412,7 @@ class GeneratedParser(Parser):
|
||||
(atom := self.atom())
|
||||
):
|
||||
return PositiveLookahead ( atom )
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
@ -521,21 +423,19 @@ class GeneratedParser(Parser):
|
||||
(atom := self.atom())
|
||||
):
|
||||
return NegativeLookahead ( atom )
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('~'))
|
||||
):
|
||||
return Cut ( )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def item(self) -> Optional[Item]:
|
||||
# item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('['))
|
||||
@ -547,36 +447,29 @@ class GeneratedParser(Parser):
|
||||
(literal_1 := self.expect(']'))
|
||||
):
|
||||
return Opt ( alts )
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(atom := self.atom())
|
||||
and
|
||||
(literal := self.expect('?'))
|
||||
):
|
||||
return Opt ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(atom := self.atom())
|
||||
and
|
||||
(literal := self.expect('*'))
|
||||
):
|
||||
return Repeat0 ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(atom := self.atom())
|
||||
and
|
||||
(literal := self.expect('+'))
|
||||
):
|
||||
return Repeat1 ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(sep := self.atom())
|
||||
and
|
||||
@ -587,21 +480,18 @@ class GeneratedParser(Parser):
|
||||
(literal_1 := self.expect('+'))
|
||||
):
|
||||
return Gather ( sep , node )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(atom := self.atom())
|
||||
):
|
||||
return atom
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def atom(self) -> Optional[Plain]:
|
||||
# atom: '(' ~ alts ')' | NAME | STRING
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('('))
|
||||
@ -613,28 +503,24 @@ class GeneratedParser(Parser):
|
||||
(literal_1 := self.expect(')'))
|
||||
):
|
||||
return Group ( alts )
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(name := self.name())
|
||||
):
|
||||
return NameLeaf ( name . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(string := self.string())
|
||||
):
|
||||
return StringLeaf ( string . string )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def action(self) -> Optional[str]:
|
||||
# action: "{" ~ target_atoms "}"
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("{"))
|
||||
@ -646,95 +532,123 @@ class GeneratedParser(Parser):
|
||||
(literal_1 := self.expect("}"))
|
||||
):
|
||||
return target_atoms
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def annotation(self) -> Optional[str]:
|
||||
# annotation: "[" ~ target_atoms "]"
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("["))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(target_atoms := self.target_atoms())
|
||||
and
|
||||
(literal_1 := self.expect("]"))
|
||||
):
|
||||
return target_atoms
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def target_atoms(self) -> Optional[str]:
|
||||
# target_atoms: target_atom target_atoms | target_atom
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
mark = self._mark()
|
||||
if (
|
||||
(target_atom := self.target_atom())
|
||||
and
|
||||
(target_atoms := self.target_atoms())
|
||||
):
|
||||
return target_atom + " " + target_atoms
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(target_atom := self.target_atom())
|
||||
):
|
||||
return target_atom
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def target_atom(self) -> Optional[str]:
|
||||
# target_atom: "{" ~ target_atoms "}" | NAME | NUMBER | STRING | "?" | ":" | !"}" OP
|
||||
mark = self.mark()
|
||||
# target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | "?" | ":" | !"}" !"]" OP
|
||||
mark = self._mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("{"))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(target_atoms := self.target_atoms())
|
||||
(atoms := self.target_atoms(),)
|
||||
and
|
||||
(literal_1 := self.expect("}"))
|
||||
):
|
||||
return "{" + target_atoms + "}"
|
||||
self.reset(mark)
|
||||
return "{" + ( atoms or "" ) + "}"
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect("["))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(atoms := self.target_atoms(),)
|
||||
and
|
||||
(literal_1 := self.expect("]"))
|
||||
):
|
||||
return "[" + ( atoms or "" ) + "]"
|
||||
self._reset(mark)
|
||||
if cut: return None
|
||||
if (
|
||||
(name := self.name())
|
||||
and
|
||||
(literal := self.expect("*"))
|
||||
):
|
||||
return name . string + "*"
|
||||
self._reset(mark)
|
||||
if (
|
||||
(name := self.name())
|
||||
):
|
||||
return name . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(number := self.number())
|
||||
):
|
||||
return number . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(string := self.string())
|
||||
):
|
||||
return string . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(literal := self.expect("?"))
|
||||
):
|
||||
return "?"
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
(literal := self.expect(":"))
|
||||
):
|
||||
return ":"
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
self._reset(mark)
|
||||
if (
|
||||
self.negative_lookahead(self.expect, "}")
|
||||
and
|
||||
self.negative_lookahead(self.expect, "]")
|
||||
and
|
||||
(op := self.op())
|
||||
):
|
||||
return op . string
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
KEYWORDS = ()
|
||||
SOFT_KEYWORDS = ('memo',)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from pegen.parser import simple_parser_main
|
||||
|
@ -38,7 +38,7 @@ issoftkeyword = frozenset(softkwlist).__contains__
|
||||
EXTRA_KEYWORDS = ["async", "await"]
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate the Lib/keywords.py file from the grammar."
|
||||
)
|
||||
@ -58,9 +58,7 @@ def main():
|
||||
grammar, _, _ = build_parser(args.grammar)
|
||||
with args.tokens_file as tok_file:
|
||||
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
|
||||
gen: ParserGenerator = CParserGenerator(
|
||||
grammar, all_tokens, exact_tok, non_exact_tok, file=None
|
||||
)
|
||||
gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
|
||||
gen.collect_todo()
|
||||
|
||||
with args.keyword_file as thefile:
|
||||
@ -68,7 +66,9 @@ def main():
|
||||
all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords)
|
||||
|
||||
keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))
|
||||
soft_keywords = "" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))
|
||||
soft_keywords = (
|
||||
"" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))
|
||||
)
|
||||
thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords))
|
||||
|
||||
|
||||
|
@ -57,13 +57,12 @@ rule[Rule]:
|
||||
| rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
|
||||
|
||||
rulename[RuleName]:
|
||||
| NAME '[' type=NAME '*' ']' { (name.string, type.string+"*") }
|
||||
| NAME '[' type=NAME ']' { (name.string, type.string) }
|
||||
| NAME annotation { (name.string, annotation) }
|
||||
| NAME { (name.string, None) }
|
||||
|
||||
# In the future this may return something more complicated
|
||||
memoflag[str]:
|
||||
| '(' 'memo' ')' { "memo" }
|
||||
| '(' "memo" ')' { "memo" }
|
||||
|
||||
alts[Rhs]:
|
||||
| alt "|" alts { Rhs([alt] + alts.alts)}
|
||||
@ -84,14 +83,13 @@ items[NamedItemList]:
|
||||
| named_item { [named_item] }
|
||||
|
||||
named_item[NamedItem]:
|
||||
| NAME '[' type=NAME '*' ']' '=' ~ item {NamedItem(name.string, item, f"{type.string}*")}
|
||||
| NAME '[' type=NAME ']' '=' ~ item {NamedItem(name.string, item, type.string)}
|
||||
| NAME annotation '=' ~ item {NamedItem(name.string, item, annotation)}
|
||||
| NAME '=' ~ item {NamedItem(name.string, item)}
|
||||
| item {NamedItem(None, item)}
|
||||
| it=forced_atom {NamedItem(None, it)}
|
||||
| forced=forced_atom {NamedItem(None, forced)}
|
||||
| it=lookahead {NamedItem(None, it)}
|
||||
|
||||
forced_atom[NamedItem]:
|
||||
forced_atom[Forced]:
|
||||
| '&''&' ~ atom {Forced(atom)}
|
||||
|
||||
lookahead[LookaheadOrCut]:
|
||||
@ -112,19 +110,22 @@ atom[Plain]:
|
||||
| NAME {NameLeaf(name.string) }
|
||||
| STRING {StringLeaf(string.string)}
|
||||
|
||||
# Mini-grammar for the actions
|
||||
# Mini-grammar for the actions and annotations
|
||||
|
||||
action[str]: "{" ~ target_atoms "}" { target_atoms }
|
||||
annotation[str]: "[" ~ target_atoms "]" { target_atoms }
|
||||
|
||||
target_atoms[str]:
|
||||
| target_atom target_atoms { target_atom + " " + target_atoms }
|
||||
| target_atom { target_atom }
|
||||
|
||||
target_atom[str]:
|
||||
| "{" ~ target_atoms "}" { "{" + target_atoms + "}" }
|
||||
| "{" ~ atoms=target_atoms? "}" { "{" + (atoms or "") + "}" }
|
||||
| "[" ~ atoms=target_atoms? "]" { "[" + (atoms or "") + "]" }
|
||||
| NAME "*" { name.string + "*" }
|
||||
| NAME { name.string }
|
||||
| NUMBER { number.string }
|
||||
| STRING { string.string }
|
||||
| "?" { "?" }
|
||||
| ":" { ":" }
|
||||
| !"}" OP { op.string }
|
||||
| !"}" !"]" OP { op.string }
|
||||
|
@ -4,13 +4,10 @@ import time
|
||||
import token
|
||||
import tokenize
|
||||
import traceback
|
||||
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Callable, cast, Dict, Optional, Tuple, Type, TypeVar
|
||||
from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
|
||||
|
||||
from pegen.tokenizer import exact_token_types
|
||||
from pegen.tokenizer import Mark
|
||||
from pegen.tokenizer import Tokenizer
|
||||
from pegen.tokenizer import Mark, Tokenizer, exact_token_types
|
||||
|
||||
T = TypeVar("T")
|
||||
P = TypeVar("P", bound="Parser")
|
||||
@ -45,12 +42,12 @@ def memoize(method: F) -> F:
|
||||
method_name = method.__name__
|
||||
|
||||
def memoize_wrapper(self: P, *args: object) -> T:
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
key = mark, method_name, args
|
||||
# Fast path: cache hit, and not verbose.
|
||||
if key in self._cache and not self._verbose:
|
||||
tree, endmark = self._cache[key]
|
||||
self.reset(endmark)
|
||||
self._reset(endmark)
|
||||
return tree
|
||||
# Slow path: no cache hit, or verbose.
|
||||
verbose = self._verbose
|
||||
@ -64,13 +61,13 @@ def memoize(method: F) -> F:
|
||||
self._level -= 1
|
||||
if verbose:
|
||||
print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}")
|
||||
endmark = self.mark()
|
||||
endmark = self._mark()
|
||||
self._cache[key] = tree, endmark
|
||||
else:
|
||||
tree, endmark = self._cache[key]
|
||||
if verbose:
|
||||
print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}")
|
||||
self.reset(endmark)
|
||||
self._reset(endmark)
|
||||
return tree
|
||||
|
||||
memoize_wrapper.__wrapped__ = method # type: ignore
|
||||
@ -82,12 +79,12 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
|
||||
method_name = method.__name__
|
||||
|
||||
def memoize_left_rec_wrapper(self: P) -> Optional[T]:
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
key = mark, method_name, ()
|
||||
# Fast path: cache hit, and not verbose.
|
||||
if key in self._cache and not self._verbose:
|
||||
tree, endmark = self._cache[key]
|
||||
self.reset(endmark)
|
||||
self._reset(endmark)
|
||||
return tree
|
||||
# Slow path: no cache hit, or verbose.
|
||||
verbose = self._verbose
|
||||
@ -113,9 +110,13 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
|
||||
print(f"{fill}Recursive {method_name} at {mark} depth {depth}")
|
||||
|
||||
while True:
|
||||
self.reset(mark)
|
||||
result = method(self)
|
||||
endmark = self.mark()
|
||||
self._reset(mark)
|
||||
self.in_recursive_rule += 1
|
||||
try:
|
||||
result = method(self)
|
||||
finally:
|
||||
self.in_recursive_rule -= 1
|
||||
endmark = self._mark()
|
||||
depth += 1
|
||||
if verbose:
|
||||
print(
|
||||
@ -131,24 +132,24 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
|
||||
break
|
||||
self._cache[key] = lastresult, lastmark = result, endmark
|
||||
|
||||
self.reset(lastmark)
|
||||
self._reset(lastmark)
|
||||
tree = lastresult
|
||||
|
||||
self._level -= 1
|
||||
if verbose:
|
||||
print(f"{fill}{method_name}() -> {tree!s:.200} [cached]")
|
||||
if tree:
|
||||
endmark = self.mark()
|
||||
endmark = self._mark()
|
||||
else:
|
||||
endmark = mark
|
||||
self.reset(endmark)
|
||||
self._reset(endmark)
|
||||
self._cache[key] = tree, endmark
|
||||
else:
|
||||
tree, endmark = self._cache[key]
|
||||
if verbose:
|
||||
print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]")
|
||||
if tree:
|
||||
self.reset(endmark)
|
||||
self._reset(endmark)
|
||||
return tree
|
||||
|
||||
memoize_left_rec_wrapper.__wrapped__ = method # type: ignore
|
||||
@ -158,15 +159,21 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
|
||||
class Parser:
|
||||
"""Parsing base class."""
|
||||
|
||||
KEYWORDS: ClassVar[Tuple[str, ...]]
|
||||
|
||||
SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
|
||||
|
||||
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
|
||||
self._tokenizer = tokenizer
|
||||
self._verbose = verbose
|
||||
self._level = 0
|
||||
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
|
||||
# Integer tracking wether we are in a left recursive rule or not. Can be useful
|
||||
# for error reporting.
|
||||
self.in_recursive_rule = 0
|
||||
# Pass through common tokenizer methods.
|
||||
# TODO: Rename to _mark and _reset.
|
||||
self.mark = self._tokenizer.mark
|
||||
self.reset = self._tokenizer.reset
|
||||
self._mark = self._tokenizer.mark
|
||||
self._reset = self._tokenizer.reset
|
||||
|
||||
@abstractmethod
|
||||
def start(self) -> Any:
|
||||
@ -179,7 +186,7 @@ class Parser:
|
||||
@memoize
|
||||
def name(self) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.type == token.NAME:
|
||||
if tok.type == token.NAME and tok.string not in self.KEYWORDS:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@ -204,6 +211,20 @@ class Parser:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def type_comment(self) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.type == token.TYPE_COMMENT:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
|
||||
tok = self._tokenizer.peek()
|
||||
@ -219,23 +240,26 @@ class Parser:
|
||||
return self._tokenizer.getnext()
|
||||
return None
|
||||
|
||||
def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
|
||||
if res is None:
|
||||
raise self.make_syntax_error(f"expected {expectation}")
|
||||
return res
|
||||
|
||||
def positive_lookahead(self, func: Callable[..., T], *args: object) -> T:
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
ok = func(*args)
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
return ok
|
||||
|
||||
def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool:
|
||||
mark = self.mark()
|
||||
mark = self._mark()
|
||||
ok = func(*args)
|
||||
self.reset(mark)
|
||||
self._reset(mark)
|
||||
return not ok
|
||||
|
||||
def make_syntax_error(self, filename: str = "<unknown>") -> SyntaxError:
|
||||
def make_syntax_error(self, message: str, filename: str = "<unknown>") -> SyntaxError:
|
||||
tok = self._tokenizer.diagnose()
|
||||
return SyntaxError(
|
||||
"pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line)
|
||||
)
|
||||
return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
|
||||
|
||||
|
||||
def simple_parser_main(parser_class: Type[Parser]) -> None:
|
||||
|
@ -1,30 +1,29 @@
|
||||
import contextlib
|
||||
from abc import abstractmethod
|
||||
|
||||
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
|
||||
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
|
||||
|
||||
from pegen import sccutils
|
||||
from pegen.grammar import (
|
||||
Grammar,
|
||||
Rule,
|
||||
Rhs,
|
||||
Alt,
|
||||
NamedItem,
|
||||
Plain,
|
||||
NameLeaf,
|
||||
Gather,
|
||||
Grammar,
|
||||
GrammarError,
|
||||
GrammarVisitor,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
Plain,
|
||||
Rhs,
|
||||
Rule,
|
||||
)
|
||||
from pegen.grammar import GrammarError, GrammarVisitor
|
||||
|
||||
|
||||
class RuleCheckingVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
|
||||
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
|
||||
self.rules = rules
|
||||
self.tokens = tokens
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> None:
|
||||
if node.value not in self.rules and node.value not in self.tokens.values():
|
||||
# TODO: Add line/col info to (leaf) nodes
|
||||
if node.value not in self.rules and node.value not in self.tokens:
|
||||
raise GrammarError(f"Dangling reference to rule {node.value!r}")
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem) -> None:
|
||||
@ -37,7 +36,7 @@ class ParserGenerator:
|
||||
|
||||
callmakervisitor: GrammarVisitor
|
||||
|
||||
def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
|
||||
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
|
||||
self.grammar = grammar
|
||||
self.tokens = tokens
|
||||
self.rules = grammar.rules
|
||||
@ -133,13 +132,22 @@ class ParserGenerator:
|
||||
self.counter += 1
|
||||
extra_function_name = f"_loop0_{self.counter}"
|
||||
extra_function_alt = Alt(
|
||||
[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
|
||||
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
|
||||
action="elem",
|
||||
)
|
||||
self.todo[extra_function_name] = Rule(
|
||||
extra_function_name, None, Rhs([extra_function_alt]),
|
||||
extra_function_name,
|
||||
None,
|
||||
Rhs([extra_function_alt]),
|
||||
)
|
||||
alt = Alt(
|
||||
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
|
||||
)
|
||||
self.todo[name] = Rule(
|
||||
name,
|
||||
None,
|
||||
Rhs([alt]),
|
||||
)
|
||||
alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
|
||||
self.todo[name] = Rule(name, None, Rhs([alt]),)
|
||||
return name
|
||||
|
||||
def dedupe(self, name: str) -> str:
|
||||
|
@ -1,25 +1,28 @@
|
||||
import ast
|
||||
import re
|
||||
import token
|
||||
from typing import Any, Dict, Optional, IO, Text, Tuple
|
||||
from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
|
||||
|
||||
from pegen import grammar
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Forced,
|
||||
Gather,
|
||||
GrammarVisitor,
|
||||
NameLeaf,
|
||||
StringLeaf,
|
||||
Rhs,
|
||||
NamedItem,
|
||||
Group,
|
||||
Lookahead,
|
||||
PositiveLookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
PositiveLookahead,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Gather,
|
||||
Group,
|
||||
Rhs,
|
||||
Rule,
|
||||
Alt,
|
||||
StringLeaf,
|
||||
)
|
||||
from pegen import grammar
|
||||
from pegen.parser_generator import ParserGenerator
|
||||
|
||||
MODULE_PREFIX = """\
|
||||
@ -27,7 +30,10 @@ MODULE_PREFIX = """\
|
||||
# @generated by pegen from {filename}
|
||||
|
||||
import ast
|
||||
from typing import Optional, Any
|
||||
import sys
|
||||
import tokenize
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from pegen.parser import memoize, memoize_left_rec, logger, Parser
|
||||
|
||||
@ -36,25 +42,81 @@ MODULE_SUFFIX = """
|
||||
|
||||
if __name__ == '__main__':
|
||||
from pegen.parser import simple_parser_main
|
||||
simple_parser_main(GeneratedParser)
|
||||
simple_parser_main({class_name})
|
||||
"""
|
||||
|
||||
|
||||
class InvalidNodeVisitor(GrammarVisitor):
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> bool:
|
||||
name = node.value
|
||||
return name.startswith("invalid")
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> bool:
|
||||
return False
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem) -> bool:
|
||||
return self.visit(node.item)
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> bool:
|
||||
return any(self.visit(alt) for alt in node.alts)
|
||||
|
||||
def visit_Alt(self, node: Alt) -> bool:
|
||||
return any(self.visit(item) for item in node.items)
|
||||
|
||||
def lookahead_call_helper(self, node: Lookahead) -> bool:
|
||||
return self.visit(node.node)
|
||||
|
||||
def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:
|
||||
return self.lookahead_call_helper(node)
|
||||
|
||||
def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
|
||||
return self.lookahead_call_helper(node)
|
||||
|
||||
def visit_Opt(self, node: Opt) -> bool:
|
||||
return self.visit(node.node)
|
||||
|
||||
def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
|
||||
return self.visit(node.node)
|
||||
|
||||
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
|
||||
return self.visit(node.node)
|
||||
|
||||
def visit_Group(self, node: Group) -> bool:
|
||||
return self.visit(node.rhs)
|
||||
|
||||
def visit_Cut(self, node: Cut) -> bool:
|
||||
return False
|
||||
|
||||
def visit_Forced(self, node: Forced) -> bool:
|
||||
return self.visit(node.node)
|
||||
|
||||
|
||||
class PythonCallMakerVisitor(GrammarVisitor):
|
||||
def __init__(self, parser_generator: ParserGenerator):
|
||||
self.gen = parser_generator
|
||||
self.cache: Dict[Any, Any] = {}
|
||||
self.keywords: Set[str] = set()
|
||||
self.soft_keywords: Set[str] = set()
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
|
||||
name = node.value
|
||||
if name in ("NAME", "NUMBER", "STRING", "OP"):
|
||||
if name == "SOFT_KEYWORD":
|
||||
return "soft_keyword", "self.soft_keyword()"
|
||||
if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):
|
||||
name = name.lower()
|
||||
return name, f"self.{name}()"
|
||||
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
|
||||
return name.lower(), f"self.expect({name!r})"
|
||||
# Avoid using names that can be Python keywords
|
||||
return "_" + name.lower(), f"self.expect({name!r})"
|
||||
return name, f"self.{name}()"
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
|
||||
val = ast.literal_eval(node.value)
|
||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||
if node.value.endswith("'"):
|
||||
self.keywords.add(val)
|
||||
else:
|
||||
self.soft_keywords.add(val)
|
||||
return "literal", f"self.expect({node.value})"
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
|
||||
@ -125,16 +187,36 @@ class PythonCallMakerVisitor(GrammarVisitor):
|
||||
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
|
||||
return "cut", "True"
|
||||
|
||||
def visit_Forced(self, node: Forced) -> Tuple[str, str]:
|
||||
if isinstance(node.node, Group):
|
||||
_, val = self.visit(node.node.rhs)
|
||||
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
|
||||
else:
|
||||
return (
|
||||
"forced",
|
||||
f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",
|
||||
)
|
||||
|
||||
|
||||
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
def __init__(
|
||||
self,
|
||||
grammar: grammar.Grammar,
|
||||
file: Optional[IO[Text]],
|
||||
tokens: Dict[int, str] = token.tok_name,
|
||||
tokens: Set[str] = set(token.tok_name.values()),
|
||||
location_formatting: Optional[str] = None,
|
||||
unreachable_formatting: Optional[str] = None,
|
||||
):
|
||||
tokens.add("SOFT_KEYWORD")
|
||||
super().__init__(grammar, tokens, file)
|
||||
self.callmakervisitor = PythonCallMakerVisitor(self)
|
||||
self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)
|
||||
self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()
|
||||
self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover"
|
||||
self.location_formatting = (
|
||||
location_formatting
|
||||
or "lineno=start_lineno, col_offset=start_col_offset, "
|
||||
"end_lineno=end_lineno, end_col_offset=end_col_offset"
|
||||
)
|
||||
|
||||
def generate(self, filename: str) -> None:
|
||||
header = self.grammar.metas.get("header", MODULE_PREFIX)
|
||||
@ -142,18 +224,35 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
self.print(header.rstrip("\n").format(filename=filename))
|
||||
subheader = self.grammar.metas.get("subheader", "")
|
||||
if subheader:
|
||||
self.print(subheader.format(filename=filename))
|
||||
self.print("class GeneratedParser(Parser):")
|
||||
self.print(subheader)
|
||||
cls_name = self.grammar.metas.get("class", "GeneratedParser")
|
||||
self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
|
||||
self.print(f"class {cls_name}(Parser):")
|
||||
while self.todo:
|
||||
for rulename, rule in list(self.todo.items()):
|
||||
del self.todo[rulename]
|
||||
self.print()
|
||||
with self.indent():
|
||||
self.visit(rule)
|
||||
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX)
|
||||
|
||||
self.print()
|
||||
with self.indent():
|
||||
self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}")
|
||||
self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}")
|
||||
|
||||
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
|
||||
if trailer is not None:
|
||||
self.print(trailer.rstrip("\n"))
|
||||
|
||||
def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:
|
||||
for alt in alts:
|
||||
if alt.action and "LOCATIONS" in alt.action:
|
||||
return True
|
||||
for n in alt.items:
|
||||
if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):
|
||||
return True
|
||||
return False
|
||||
|
||||
def visit_Rule(self, node: Rule) -> None:
|
||||
is_loop = node.is_loop()
|
||||
is_gather = node.is_gather()
|
||||
@ -173,7 +272,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
self.print(f"# {node.name}: {rhs}")
|
||||
if node.nullable:
|
||||
self.print(f"# nullable={node.nullable}")
|
||||
self.print("mark = self.mark()")
|
||||
self.print("mark = self._mark()")
|
||||
if self.alts_uses_locations(node.rhs.alts):
|
||||
self.print("tok = self._tokenizer.peek()")
|
||||
self.print("start_lineno, start_col_offset = tok.start")
|
||||
if is_loop:
|
||||
self.print("children = []")
|
||||
self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
|
||||
@ -200,8 +302,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
|
||||
|
||||
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
|
||||
has_cut = any(isinstance(item.item, Cut) for item in node.items)
|
||||
with self.local_variable_context():
|
||||
self.print("cut = False") # TODO: Only if needed.
|
||||
if has_cut:
|
||||
self.print("cut = False")
|
||||
if is_loop:
|
||||
self.print("while (")
|
||||
else:
|
||||
@ -227,12 +331,26 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
|
||||
)
|
||||
else:
|
||||
action = f"[{', '.join(self.local_variable_names)}]"
|
||||
if self.invalidvisitor.visit(node):
|
||||
action = "UNREACHABLE"
|
||||
elif len(self.local_variable_names) == 1:
|
||||
action = f"{self.local_variable_names[0]}"
|
||||
else:
|
||||
action = f"[{', '.join(self.local_variable_names)}]"
|
||||
elif "LOCATIONS" in action:
|
||||
self.print("tok = self._tokenizer.get_last_non_whitespace_token()")
|
||||
self.print("end_lineno, end_col_offset = tok.end")
|
||||
action = action.replace("LOCATIONS", self.location_formatting)
|
||||
|
||||
if is_loop:
|
||||
self.print(f"children.append({action})")
|
||||
self.print(f"mark = self.mark()")
|
||||
self.print(f"mark = self._mark()")
|
||||
else:
|
||||
if "UNREACHABLE" in action:
|
||||
action = action.replace("UNREACHABLE", self.unreachable_formatting)
|
||||
self.print(f"return {action}")
|
||||
self.print("self.reset(mark)")
|
||||
|
||||
self.print("self._reset(mark)")
|
||||
# Skip remaining alternatives if a cut was reached.
|
||||
self.print("if cut: return None") # TODO: Only if needed.
|
||||
if has_cut:
|
||||
self.print("if cut: return None")
|
||||
|
@ -18,7 +18,7 @@ from pegen.python_generator import PythonParserGenerator
|
||||
from pegen.tokenizer import Tokenizer
|
||||
|
||||
ALL_TOKENS = token.tok_name
|
||||
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
|
||||
EXACT_TOKENS = token.EXACT_TOKEN_TYPES
|
||||
NON_EXACT_TOKENS = {
|
||||
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
|
||||
}
|
||||
@ -42,7 +42,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F
|
||||
parser = parser_class(tokenizer, verbose=verbose)
|
||||
result = parser.start()
|
||||
if result is None:
|
||||
raise parser.make_syntax_error()
|
||||
raise parser.make_syntax_error("invalid syntax")
|
||||
return result
|
||||
|
||||
|
||||
@ -66,6 +66,7 @@ def import_file(full_name: str, path: str) -> Any:
|
||||
"""Import a python module from a path"""
|
||||
|
||||
spec = importlib.util.spec_from_file_location(full_name, path)
|
||||
assert spec is not None
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
|
||||
# We assume this is not None and has an exec_module() method.
|
||||
|
@ -1,10 +1,10 @@
|
||||
import token
|
||||
import tokenize
|
||||
from typing import List, Iterator
|
||||
from typing import Dict, Iterator, List
|
||||
|
||||
Mark = int # NewType('Mark', int)
|
||||
|
||||
exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore
|
||||
exact_token_types = token.EXACT_TOKEN_TYPES
|
||||
|
||||
|
||||
def shorttok(tok: tokenize.TokenInfo) -> str:
|
||||
@ -19,26 +19,22 @@ class Tokenizer:
|
||||
|
||||
_tokens: List[tokenize.TokenInfo]
|
||||
|
||||
def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
|
||||
def __init__(
|
||||
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
|
||||
):
|
||||
self._tokengen = tokengen
|
||||
self._tokens = []
|
||||
self._index = 0
|
||||
self._verbose = verbose
|
||||
self._lines: Dict[int, str] = {}
|
||||
self._path = path
|
||||
if verbose:
|
||||
self.report(False, False)
|
||||
|
||||
def getnext(self) -> tokenize.TokenInfo:
|
||||
"""Return the next token and updates the index."""
|
||||
cached = True
|
||||
while self._index == len(self._tokens):
|
||||
tok = next(self._tokengen)
|
||||
if tok.type in (tokenize.NL, tokenize.COMMENT):
|
||||
continue
|
||||
if tok.type == token.ERRORTOKEN and tok.string.isspace():
|
||||
continue
|
||||
self._tokens.append(tok)
|
||||
cached = False
|
||||
tok = self._tokens[self._index]
|
||||
cached = not self._index == len(self._tokens)
|
||||
tok = self.peek()
|
||||
self._index += 1
|
||||
if self._verbose:
|
||||
self.report(cached, False)
|
||||
@ -52,7 +48,15 @@ class Tokenizer:
|
||||
continue
|
||||
if tok.type == token.ERRORTOKEN and tok.string.isspace():
|
||||
continue
|
||||
if (
|
||||
tok.type == token.NEWLINE
|
||||
and self._tokens
|
||||
and self._tokens[-1].type == token.NEWLINE
|
||||
):
|
||||
continue
|
||||
self._tokens.append(tok)
|
||||
if not self._path:
|
||||
self._lines[tok.start[0]] = tok.line
|
||||
return self._tokens[self._index]
|
||||
|
||||
def diagnose(self) -> tokenize.TokenInfo:
|
||||
@ -60,6 +64,34 @@ class Tokenizer:
|
||||
self.getnext()
|
||||
return self._tokens[-1]
|
||||
|
||||
def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
|
||||
for tok in reversed(self._tokens[: self._index]):
|
||||
if tok.type != tokenize.ENDMARKER and (
|
||||
tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
|
||||
):
|
||||
break
|
||||
return tok
|
||||
|
||||
def get_lines(self, line_numbers: List[int]) -> List[str]:
|
||||
"""Retrieve source lines corresponding to line numbers."""
|
||||
if self._lines:
|
||||
lines = self._lines
|
||||
else:
|
||||
n = len(line_numbers)
|
||||
lines = {}
|
||||
count = 0
|
||||
seen = 0
|
||||
with open(self._path) as f:
|
||||
for l in f:
|
||||
count += 1
|
||||
if count in line_numbers:
|
||||
seen += 1
|
||||
lines[count] = l
|
||||
if seen == n:
|
||||
break
|
||||
|
||||
return [lines[n] for n in line_numbers]
|
||||
|
||||
def mark(self) -> Mark:
|
||||
return self._index
|
||||
|
||||
|
@ -1,51 +1,45 @@
|
||||
from typing import Optional
|
||||
|
||||
from pegen import grammar
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
PositiveLookahead,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
Rhs,
|
||||
)
|
||||
|
||||
|
||||
class ValidationError(Exception):
|
||||
pass
|
||||
|
||||
class GrammarValidator(GrammarVisitor):
|
||||
def __init__(self, grammar: grammar.Grammar):
|
||||
self.grammar = grammar
|
||||
self.rulename = None
|
||||
|
||||
def validate_rule(self, rulename: str, node: Rule):
|
||||
class GrammarValidator(GrammarVisitor):
|
||||
def __init__(self, grammar: grammar.Grammar) -> None:
|
||||
self.grammar = grammar
|
||||
self.rulename: Optional[str] = None
|
||||
|
||||
def validate_rule(self, rulename: str, node: Rule) -> None:
|
||||
self.rulename = rulename
|
||||
self.visit(node)
|
||||
self.rulename = None
|
||||
|
||||
|
||||
class SubRuleValidator(GrammarValidator):
|
||||
def visit_Rhs(self, node: Rule):
|
||||
def visit_Rhs(self, node: Rhs) -> None:
|
||||
for index, alt in enumerate(node.alts):
|
||||
alts_to_consider = node.alts[index+1:]
|
||||
alts_to_consider = node.alts[index + 1 :]
|
||||
for other_alt in alts_to_consider:
|
||||
self.check_intersection(alt, other_alt)
|
||||
|
||||
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
|
||||
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
|
||||
if str(second_alt).startswith(str(first_alt)):
|
||||
raise ValidationError(
|
||||
f"In {self.rulename} there is an alternative that will "
|
||||
f"never be visited:\n{second_alt}")
|
||||
f"In {self.rulename} there is an alternative that will "
|
||||
f"never be visited:\n{second_alt}"
|
||||
)
|
||||
|
||||
def validate_grammar(the_grammar: grammar.Grammar):
|
||||
|
||||
def validate_grammar(the_grammar: grammar.Grammar) -> None:
|
||||
for validator_cls in GrammarValidator.__subclasses__():
|
||||
validator = validator_cls(the_grammar)
|
||||
for rule_name, rule in the_grammar.rules.items():
|
||||
|
@ -76,7 +76,10 @@ def run_benchmark_stdlib(subcommand):
|
||||
parse_directory(
|
||||
"../../Lib",
|
||||
verbose=False,
|
||||
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
|
||||
excluded_files=[
|
||||
"*/bad*",
|
||||
"*/lib2to3/tests/data/*",
|
||||
],
|
||||
short=True,
|
||||
mode=modes[subcommand],
|
||||
)
|
||||
|
@ -8,7 +8,8 @@ from typing import Dict, Any
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="download_pypi_packages", description="Helper program to download PyPI packages",
|
||||
prog="download_pypi_packages",
|
||||
description="Helper program to download PyPI packages",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-n", "--number", type=int, default=100, help="Number of packages to download"
|
||||
|
@ -41,7 +41,10 @@ from pegen.grammar import (
|
||||
Rhs,
|
||||
)
|
||||
|
||||
argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="graph_grammar",
|
||||
description="Graph a grammar tree",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-s",
|
||||
"--start",
|
||||
|
@ -19,7 +19,8 @@ from scripts import test_parse_directory
|
||||
HERE = pathlib.Path(__file__).resolve().parent
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="test_pypi_packages", description="Helper program to test parsing PyPI packages",
|
||||
prog="test_pypi_packages",
|
||||
description="Helper program to test parsing PyPI packages",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
|
||||
|
Loading…
Reference in New Issue
Block a user