mirror of
https://github.com/python/cpython.git
synced 2025-01-25 02:24:25 +08:00
b0871cac11
svn+ssh://pythondev@svn.python.org/python/branches/py3k ................ r85510 | benjamin.peterson | 2010-10-14 18:00:04 -0500 (Thu, 14 Oct 2010) | 61 lines Merged revisions 83852-83853,83857,84042,84216,84274-84276,84375,85388,85478,85506-85508 via svnmerge from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line wrap with parens ........ r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line use parens ........ r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line things which use touch_import should be pre order ........ r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers. ........ r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line allow star_expr in testlist_gexp ........ r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line wrap long line ........ r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line cleanup ........ r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line when there's a None value and a traceback, don't call type with it #9661 ........ r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code. ........ r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line fix urllib fixer with multiple as imports on a line #10069 ........ r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line stop abusing docstrings ........ r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line kill sibling import ........ r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line remove trailing whitespace ........ r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line typo ........ ................
169 lines
6.7 KiB
Python
169 lines
6.7 KiB
Python
"""A bottom-up tree matching algorithm implementation meant to speed
|
|
up 2to3's matching process. After the tree patterns are reduced to
|
|
their rarest linear path, a linear Aho-Corasick automaton is
|
|
created. The linear automaton traverses the linear paths from the
|
|
leaves to the root of the AST and returns a set of nodes for further
|
|
matching. This reduces significantly the number of candidate nodes."""
|
|
|
|
__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
|
|
|
|
import logging
|
|
import itertools
|
|
from collections import defaultdict
|
|
|
|
from . import pytree
|
|
from .btm_utils import reduce_tree
|
|
|
|
class BMNode(object):
|
|
"""Class for a node of the Aho-Corasick automaton used in matching"""
|
|
count = itertools.count()
|
|
def __init__(self):
|
|
self.transition_table = {}
|
|
self.fixers = []
|
|
self.id = next(BMNode.count)
|
|
self.content = ''
|
|
|
|
class BottomMatcher(object):
|
|
"""The main matcher class. After instantiating the patterns should
|
|
be added using the add_fixer method"""
|
|
|
|
def __init__(self):
|
|
self.match = set()
|
|
self.root = BMNode()
|
|
self.nodes = [self.root]
|
|
self.fixers = []
|
|
self.logger = logging.getLogger("RefactoringTool")
|
|
|
|
def add_fixer(self, fixer):
|
|
"""Reduces a fixer's pattern tree to a linear path and adds it
|
|
to the matcher(a common Aho-Corasick automaton). The fixer is
|
|
appended on the matching states and called when they are
|
|
reached"""
|
|
self.fixers.append(fixer)
|
|
tree = reduce_tree(fixer.pattern_tree)
|
|
linear = tree.get_linear_subpattern()
|
|
match_nodes = self.add(linear, start=self.root)
|
|
for match_node in match_nodes:
|
|
match_node.fixers.append(fixer)
|
|
|
|
def add(self, pattern, start):
|
|
"Recursively adds a linear pattern to the AC automaton"
|
|
#print("adding pattern", pattern, "to", start)
|
|
if not pattern:
|
|
#print("empty pattern")
|
|
return [start]
|
|
if isinstance(pattern[0], tuple):
|
|
#alternatives
|
|
#print("alternatives")
|
|
match_nodes = []
|
|
for alternative in pattern[0]:
|
|
#add all alternatives, and add the rest of the pattern
|
|
#to each end node
|
|
end_nodes = self.add(alternative, start=start)
|
|
for end in end_nodes:
|
|
match_nodes.extend(self.add(pattern[1:], end))
|
|
return match_nodes
|
|
else:
|
|
#single token
|
|
#not last
|
|
if pattern[0] not in start.transition_table:
|
|
#transition did not exist, create new
|
|
next_node = BMNode()
|
|
start.transition_table[pattern[0]] = next_node
|
|
else:
|
|
#transition exists already, follow
|
|
next_node = start.transition_table[pattern[0]]
|
|
|
|
if pattern[1:]:
|
|
end_nodes = self.add(pattern[1:], start=next_node)
|
|
else:
|
|
end_nodes = [next_node]
|
|
return end_nodes
|
|
|
|
def run(self, leaves):
|
|
"""The main interface with the bottom matcher. The tree is
|
|
traversed from the bottom using the constructed
|
|
automaton. Nodes are only checked once as the tree is
|
|
retraversed. When the automaton fails, we give it one more
|
|
shot(in case the above tree matches as a whole with the
|
|
rejected leaf), then we break for the next leaf. There is the
|
|
special case of multiple arguments(see code comments) where we
|
|
recheck the nodes
|
|
|
|
Args:
|
|
The leaves of the AST tree to be matched
|
|
|
|
Returns:
|
|
A dictionary of node matches with fixers as the keys
|
|
"""
|
|
current_ac_node = self.root
|
|
results = defaultdict(list)
|
|
for leaf in leaves:
|
|
current_ast_node = leaf
|
|
while current_ast_node:
|
|
current_ast_node.was_checked = True
|
|
for child in current_ast_node.children:
|
|
# multiple statements, recheck
|
|
if isinstance(child, pytree.Leaf) and child.value == ";":
|
|
current_ast_node.was_checked = False
|
|
break
|
|
if current_ast_node.type == 1:
|
|
#name
|
|
node_token = current_ast_node.value
|
|
else:
|
|
node_token = current_ast_node.type
|
|
|
|
if node_token in current_ac_node.transition_table:
|
|
#token matches
|
|
current_ac_node = current_ac_node.transition_table[node_token]
|
|
for fixer in current_ac_node.fixers:
|
|
if not fixer in results:
|
|
results[fixer] = []
|
|
results[fixer].append(current_ast_node)
|
|
|
|
else:
|
|
#matching failed, reset automaton
|
|
current_ac_node = self.root
|
|
if (current_ast_node.parent is not None
|
|
and current_ast_node.parent.was_checked):
|
|
#the rest of the tree upwards has been checked, next leaf
|
|
break
|
|
|
|
#recheck the rejected node once from the root
|
|
if node_token in current_ac_node.transition_table:
|
|
#token matches
|
|
current_ac_node = current_ac_node.transition_table[node_token]
|
|
for fixer in current_ac_node.fixers:
|
|
if not fixer in results.keys():
|
|
results[fixer] = []
|
|
results[fixer].append(current_ast_node)
|
|
|
|
current_ast_node = current_ast_node.parent
|
|
return results
|
|
|
|
def print_ac(self):
|
|
"Prints a graphviz diagram of the BM automaton(for debugging)"
|
|
print("digraph g{")
|
|
def print_node(node):
|
|
for subnode_key in node.transition_table.keys():
|
|
subnode = node.transition_table[subnode_key]
|
|
print("%d -> %d [label=%s] //%s" %
|
|
(node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
|
|
if subnode_key == 1:
|
|
print(subnode.content)
|
|
print_node(subnode)
|
|
print_node(self.root)
|
|
print("}")
|
|
|
|
# taken from pytree.py for debugging; only used by print_ac
|
|
_type_reprs = {}
|
|
def type_repr(type_num):
|
|
global _type_reprs
|
|
if not _type_reprs:
|
|
from .pygram import python_symbols
|
|
# printing tokens is possible but not as useful
|
|
# from .pgen2 import token // token.__dict__.items():
|
|
for name, val in python_symbols.__dict__.items():
|
|
if type(val) == int: _type_reprs[val] = name
|
|
return _type_reprs.setdefault(type_num, type_num)
|