These files are a subset of the python-2.7.2.tgz distribution from python.org. Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution. Signed-off-by: daryl.mcdaniel@intel.com git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524
		
			
				
	
	
		
			169 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			169 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """A bottom-up tree matching algorithm implementation meant to speed
 | |
| up 2to3's matching process. After the tree patterns are reduced to
 | |
| their rarest linear path, a linear Aho-Corasick automaton is
 | |
| created. The linear automaton traverses the linear paths from the
 | |
| leaves to the root of the AST and returns a set of nodes for further
 | |
| matching. This reduces significantly the number of candidate nodes."""
 | |
| 
 | |
| __author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
 | |
| 
 | |
| import logging
 | |
| import itertools
 | |
| from collections import defaultdict
 | |
| 
 | |
| from . import pytree
 | |
| from .btm_utils import reduce_tree
 | |
| 
 | |
| class BMNode(object):
 | |
|     """Class for a node of the Aho-Corasick automaton used in matching"""
 | |
|     count = itertools.count()
 | |
|     def __init__(self):
 | |
|         self.transition_table = {}
 | |
|         self.fixers = []
 | |
|         self.id = next(BMNode.count)
 | |
|         self.content = ''
 | |
| 
 | |
| class BottomMatcher(object):
 | |
|     """The main matcher class. After instantiating the patterns should
 | |
|     be added using the add_fixer method"""
 | |
| 
 | |
|     def __init__(self):
 | |
|         self.match = set()
 | |
|         self.root = BMNode()
 | |
|         self.nodes = [self.root]
 | |
|         self.fixers = []
 | |
|         self.logger = logging.getLogger("RefactoringTool")
 | |
| 
 | |
|     def add_fixer(self, fixer):
 | |
|         """Reduces a fixer's pattern tree to a linear path and adds it
 | |
|         to the matcher(a common Aho-Corasick automaton). The fixer is
 | |
|         appended on the matching states and called when they are
 | |
|         reached"""
 | |
|         self.fixers.append(fixer)
 | |
|         tree = reduce_tree(fixer.pattern_tree)
 | |
|         linear = tree.get_linear_subpattern()
 | |
|         match_nodes = self.add(linear, start=self.root)
 | |
|         for match_node in match_nodes:
 | |
|             match_node.fixers.append(fixer)
 | |
| 
 | |
|     def add(self, pattern, start):
 | |
|         "Recursively adds a linear pattern to the AC automaton"
 | |
|         #print("adding pattern", pattern, "to", start)
 | |
|         if not pattern:
 | |
|             #print("empty pattern")
 | |
|             return [start]
 | |
|         if isinstance(pattern[0], tuple):
 | |
|             #alternatives
 | |
|             #print("alternatives")
 | |
|             match_nodes = []
 | |
|             for alternative in pattern[0]:
 | |
|                 #add all alternatives, and add the rest of the pattern
 | |
|                 #to each end node
 | |
|                 end_nodes = self.add(alternative, start=start)
 | |
|                 for end in end_nodes:
 | |
|                     match_nodes.extend(self.add(pattern[1:], end))
 | |
|             return match_nodes
 | |
|         else:
 | |
|             #single token
 | |
|             #not last
 | |
|             if pattern[0] not in start.transition_table:
 | |
|                 #transition did not exist, create new
 | |
|                 next_node = BMNode()
 | |
|                 start.transition_table[pattern[0]] = next_node
 | |
|             else:
 | |
|                 #transition exists already, follow
 | |
|                 next_node = start.transition_table[pattern[0]]
 | |
| 
 | |
|             if pattern[1:]:
 | |
|                 end_nodes = self.add(pattern[1:], start=next_node)
 | |
|             else:
 | |
|                 end_nodes = [next_node]
 | |
|             return end_nodes
 | |
| 
 | |
|     def run(self, leaves):
 | |
|         """The main interface with the bottom matcher. The tree is
 | |
|         traversed from the bottom using the constructed
 | |
|         automaton. Nodes are only checked once as the tree is
 | |
|         retraversed. When the automaton fails, we give it one more
 | |
|         shot(in case the above tree matches as a whole with the
 | |
|         rejected leaf), then we break for the next leaf. There is the
 | |
|         special case of multiple arguments(see code comments) where we
 | |
|         recheck the nodes
 | |
| 
 | |
|         Args:
 | |
|            The leaves of the AST tree to be matched
 | |
| 
 | |
|         Returns:
 | |
|            A dictionary of node matches with fixers as the keys
 | |
|         """
 | |
|         current_ac_node = self.root
 | |
|         results = defaultdict(list)
 | |
|         for leaf in leaves:
 | |
|             current_ast_node = leaf
 | |
|             while current_ast_node:
 | |
|                 current_ast_node.was_checked = True
 | |
|                 for child in current_ast_node.children:
 | |
|                     # multiple statements, recheck
 | |
|                     if isinstance(child, pytree.Leaf) and child.value == u";":
 | |
|                         current_ast_node.was_checked = False
 | |
|                         break
 | |
|                 if current_ast_node.type == 1:
 | |
|                     #name
 | |
|                     node_token = current_ast_node.value
 | |
|                 else:
 | |
|                     node_token = current_ast_node.type
 | |
| 
 | |
|                 if node_token in current_ac_node.transition_table:
 | |
|                     #token matches
 | |
|                     current_ac_node = current_ac_node.transition_table[node_token]
 | |
|                     for fixer in current_ac_node.fixers:
 | |
|                         if not fixer in results:
 | |
|                             results[fixer] = []
 | |
|                         results[fixer].append(current_ast_node)
 | |
| 
 | |
|                 else:
 | |
|                     #matching failed, reset automaton
 | |
|                     current_ac_node = self.root
 | |
|                     if (current_ast_node.parent is not None
 | |
|                         and current_ast_node.parent.was_checked):
 | |
|                         #the rest of the tree upwards has been checked, next leaf
 | |
|                         break
 | |
| 
 | |
|                     #recheck the rejected node once from the root
 | |
|                     if node_token in current_ac_node.transition_table:
 | |
|                         #token matches
 | |
|                         current_ac_node = current_ac_node.transition_table[node_token]
 | |
|                         for fixer in current_ac_node.fixers:
 | |
|                             if not fixer in results.keys():
 | |
|                                 results[fixer] = []
 | |
|                             results[fixer].append(current_ast_node)
 | |
| 
 | |
|                 current_ast_node = current_ast_node.parent
 | |
|         return results
 | |
| 
 | |
|     def print_ac(self):
 | |
|         "Prints a graphviz diagram of the BM automaton(for debugging)"
 | |
|         print("digraph g{")
 | |
|         def print_node(node):
 | |
|             for subnode_key in node.transition_table.keys():
 | |
|                 subnode = node.transition_table[subnode_key]
 | |
|                 print("%d -> %d [label=%s] //%s" %
 | |
|                       (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
 | |
|                 if subnode_key == 1:
 | |
|                     print(subnode.content)
 | |
|                 print_node(subnode)
 | |
|         print_node(self.root)
 | |
|         print("}")
 | |
| 
 | |
| # taken from pytree.py for debugging; only used by print_ac
 | |
| _type_reprs = {}
 | |
| def type_repr(type_num):
 | |
|     global _type_reprs
 | |
|     if not _type_reprs:
 | |
|         from .pygram import python_symbols
 | |
|         # printing tokens is possible but not as useful
 | |
|         # from .pgen2 import token // token.__dict__.items():
 | |
|         for name, val in python_symbols.__dict__.items():
 | |
|             if type(val) == int: _type_reprs[val] = name
 | |
|     return _type_reprs.setdefault(type_num, type_num)
 |