2008-03-19 13:04:44 +08:00
|
|
|
# Copyright 2006 Google, Inc. All Rights Reserved.
|
|
|
|
# Licensed to PSF under a Contributor Agreement.
|
|
|
|
|
|
|
|
"""Pattern compiler.
|
|
|
|
|
|
|
|
The grammer is taken from PatternGrammar.txt.
|
|
|
|
|
|
|
|
The compiler compiles a pattern to a pytree.*Pattern instance.
|
|
|
|
"""
|
|
|
|
|
|
|
|
__author__ = "Guido van Rossum <guido@python.org>"
|
|
|
|
|
|
|
|
# Python imports
|
|
|
|
import os
|
|
|
|
|
|
|
|
# Fairly local imports
|
|
|
|
from .pgen2 import driver
|
|
|
|
from .pgen2 import literals
|
|
|
|
from .pgen2 import token
|
|
|
|
from .pgen2 import tokenize
|
|
|
|
|
|
|
|
# Really local imports
|
|
|
|
from . import pytree
|
|
|
|
from . import pygram
|
|
|
|
|
|
|
|
# The pattern grammar file
|
|
|
|
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
|
|
|
|
"PatternGrammar.txt")
|
|
|
|
|
|
|
|
|
|
|
|
def tokenize_wrapper(input):
|
|
|
|
"""Tokenizes a string suppressing significant whitespace."""
|
Merged revisions 72368 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
................
r72368 | benjamin.peterson | 2009-05-05 18:13:58 -0500 (Tue, 05 May 2009) | 53 lines
Merged revisions 68503,68507,68694,69054,69673,69679-69681,70991,70999,71003,71695 via svnmerge from
svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
........
r68503 | benjamin.peterson | 2009-01-10 14:14:49 -0600 (Sat, 10 Jan 2009) | 1 line
use variable
........
r68507 | benjamin.peterson | 2009-01-10 15:13:16 -0600 (Sat, 10 Jan 2009) | 1 line
rewrap
........
r68694 | benjamin.peterson | 2009-01-17 17:55:59 -0600 (Sat, 17 Jan 2009) | 1 line
test for specific node type
........
r69054 | guilherme.polo | 2009-01-28 10:01:54 -0600 (Wed, 28 Jan 2009) | 2 lines
Added mapping for the ttk module.
........
r69673 | benjamin.peterson | 2009-02-16 09:38:22 -0600 (Mon, 16 Feb 2009) | 1 line
fix handling of as imports #5279
........
r69679 | benjamin.peterson | 2009-02-16 11:36:06 -0600 (Mon, 16 Feb 2009) | 1 line
make Base.get_next_sibling() and Base.get_prev_sibling() properties
........
r69680 | benjamin.peterson | 2009-02-16 11:41:48 -0600 (Mon, 16 Feb 2009) | 1 line
normalize docstrings in pytree according to PEP 11
........
r69681 | benjamin.peterson | 2009-02-16 11:43:09 -0600 (Mon, 16 Feb 2009) | 1 line
use a set
........
r70991 | benjamin.peterson | 2009-04-01 15:54:50 -0500 (Wed, 01 Apr 2009) | 1 line
map urllib.urlopen to urllib.request.open #5637
........
r70999 | benjamin.peterson | 2009-04-01 17:36:47 -0500 (Wed, 01 Apr 2009) | 1 line
add very alpha support to 2to3 for running concurrently with multiprocessing
........
r71003 | benjamin.peterson | 2009-04-01 18:10:43 -0500 (Wed, 01 Apr 2009) | 1 line
fix when multiprocessing is not available or used
........
r71695 | benjamin.peterson | 2009-04-17 22:21:29 -0500 (Fri, 17 Apr 2009) | 1 line
refactor multiprocessing support, so it's less hacky to employ and only loads mp when needed
........
................
2009-05-06 07:23:31 +08:00
|
|
|
skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
|
2008-03-19 13:33:36 +08:00
|
|
|
tokens = tokenize.generate_tokens(driver.generate_lines(input).__next__)
|
2008-03-19 13:04:44 +08:00
|
|
|
for quintuple in tokens:
|
|
|
|
type, value, start, end, line_text = quintuple
|
|
|
|
if type not in skip:
|
|
|
|
yield quintuple
|
|
|
|
|
|
|
|
|
|
|
|
class PatternCompiler(object):
|
|
|
|
|
|
|
|
def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
|
|
|
|
"""Initializer.
|
|
|
|
|
|
|
|
Takes an optional alternative filename for the pattern grammar.
|
|
|
|
"""
|
|
|
|
self.grammar = driver.load_grammar(grammar_file)
|
|
|
|
self.syms = pygram.Symbols(self.grammar)
|
|
|
|
self.pygrammar = pygram.python_grammar
|
|
|
|
self.pysyms = pygram.python_symbols
|
|
|
|
self.driver = driver.Driver(self.grammar, convert=pattern_convert)
|
|
|
|
|
|
|
|
def compile_pattern(self, input, debug=False):
|
|
|
|
"""Compiles a pattern string to a nested pytree.*Pattern object."""
|
|
|
|
tokens = tokenize_wrapper(input)
|
|
|
|
root = self.driver.parse_tokens(tokens, debug=debug)
|
|
|
|
return self.compile_node(root)
|
|
|
|
|
|
|
|
def compile_node(self, node):
|
|
|
|
"""Compiles a node, recursively.
|
|
|
|
|
|
|
|
This is one big switch on the node type.
|
|
|
|
"""
|
|
|
|
# XXX Optimize certain Wildcard-containing-Wildcard patterns
|
|
|
|
# that can be merged
|
|
|
|
if node.type == self.syms.Matcher:
|
|
|
|
node = node.children[0] # Avoid unneeded recursion
|
|
|
|
|
|
|
|
if node.type == self.syms.Alternatives:
|
|
|
|
# Skip the odd children since they are just '|' tokens
|
|
|
|
alts = [self.compile_node(ch) for ch in node.children[::2]]
|
|
|
|
if len(alts) == 1:
|
|
|
|
return alts[0]
|
|
|
|
p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
|
|
|
|
return p.optimize()
|
|
|
|
|
|
|
|
if node.type == self.syms.Alternative:
|
|
|
|
units = [self.compile_node(ch) for ch in node.children]
|
|
|
|
if len(units) == 1:
|
|
|
|
return units[0]
|
|
|
|
p = pytree.WildcardPattern([units], min=1, max=1)
|
|
|
|
return p.optimize()
|
|
|
|
|
|
|
|
if node.type == self.syms.NegatedUnit:
|
|
|
|
pattern = self.compile_basic(node.children[1:])
|
|
|
|
p = pytree.NegatedPattern(pattern)
|
|
|
|
return p.optimize()
|
|
|
|
|
|
|
|
assert node.type == self.syms.Unit
|
|
|
|
|
|
|
|
name = None
|
|
|
|
nodes = node.children
|
|
|
|
if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
|
|
|
|
name = nodes[0].value
|
|
|
|
nodes = nodes[2:]
|
|
|
|
repeat = None
|
|
|
|
if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
|
|
|
|
repeat = nodes[-1]
|
|
|
|
nodes = nodes[:-1]
|
|
|
|
|
|
|
|
# Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
|
|
|
|
pattern = self.compile_basic(nodes, repeat)
|
|
|
|
|
|
|
|
if repeat is not None:
|
|
|
|
assert repeat.type == self.syms.Repeater
|
|
|
|
children = repeat.children
|
|
|
|
child = children[0]
|
|
|
|
if child.type == token.STAR:
|
|
|
|
min = 0
|
|
|
|
max = pytree.HUGE
|
|
|
|
elif child.type == token.PLUS:
|
|
|
|
min = 1
|
|
|
|
max = pytree.HUGE
|
|
|
|
elif child.type == token.LBRACE:
|
|
|
|
assert children[-1].type == token.RBRACE
|
|
|
|
assert len(children) in (3, 5)
|
|
|
|
min = max = self.get_int(children[1])
|
|
|
|
if len(children) == 5:
|
|
|
|
max = self.get_int(children[3])
|
|
|
|
else:
|
|
|
|
assert False
|
|
|
|
if min != 1 or max != 1:
|
|
|
|
pattern = pattern.optimize()
|
|
|
|
pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
|
|
|
|
|
|
|
|
if name is not None:
|
|
|
|
pattern.name = name
|
|
|
|
return pattern.optimize()
|
|
|
|
|
|
|
|
def compile_basic(self, nodes, repeat=None):
|
|
|
|
# Compile STRING | NAME [Details] | (...) | [...]
|
|
|
|
assert len(nodes) >= 1
|
|
|
|
node = nodes[0]
|
|
|
|
if node.type == token.STRING:
|
|
|
|
value = literals.evalString(node.value)
|
|
|
|
return pytree.LeafPattern(content=value)
|
|
|
|
elif node.type == token.NAME:
|
|
|
|
value = node.value
|
|
|
|
if value.isupper():
|
|
|
|
if value not in TOKEN_MAP:
|
|
|
|
raise SyntaxError("Invalid token: %r" % value)
|
|
|
|
return pytree.LeafPattern(TOKEN_MAP[value])
|
|
|
|
else:
|
|
|
|
if value == "any":
|
|
|
|
type = None
|
|
|
|
elif not value.startswith("_"):
|
|
|
|
type = getattr(self.pysyms, value, None)
|
|
|
|
if type is None:
|
|
|
|
raise SyntaxError("Invalid symbol: %r" % value)
|
|
|
|
if nodes[1:]: # Details present
|
|
|
|
content = [self.compile_node(nodes[1].children[1])]
|
|
|
|
else:
|
|
|
|
content = None
|
|
|
|
return pytree.NodePattern(type, content)
|
|
|
|
elif node.value == "(":
|
|
|
|
return self.compile_node(nodes[1])
|
|
|
|
elif node.value == "[":
|
|
|
|
assert repeat is None
|
|
|
|
subpattern = self.compile_node(nodes[1])
|
|
|
|
return pytree.WildcardPattern([[subpattern]], min=0, max=1)
|
|
|
|
assert False, node
|
|
|
|
|
|
|
|
def get_int(self, node):
|
|
|
|
assert node.type == token.NUMBER
|
|
|
|
return int(node.value)
|
|
|
|
|
|
|
|
|
|
|
|
# Map named tokens to the type value for a LeafPattern
|
|
|
|
TOKEN_MAP = {"NAME": token.NAME,
|
|
|
|
"STRING": token.STRING,
|
|
|
|
"NUMBER": token.NUMBER,
|
|
|
|
"TOKEN": None}
|
|
|
|
|
|
|
|
|
|
|
|
def pattern_convert(grammar, raw_node_info):
|
|
|
|
"""Converts raw node information to a Node or Leaf instance."""
|
|
|
|
type, value, context, children = raw_node_info
|
|
|
|
if children or type in grammar.number2symbol:
|
|
|
|
return pytree.Node(type, children, context=context)
|
|
|
|
else:
|
|
|
|
return pytree.Leaf(type, value, context=context)
|
|
|
|
|
|
|
|
|
|
|
|
def compile_pattern(pattern):
|
|
|
|
return PatternCompiler().compile_pattern(pattern)
|