These files are a subset of the python-2.7.2.tgz distribution from python.org. Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution. Signed-off-by: daryl.mcdaniel@intel.com git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524
		
			
				
	
	
		
			185 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			185 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 | 
						|
# Licensed to PSF under a Contributor Agreement.
 | 
						|
 | 
						|
"""This module defines the data structures used to represent a grammar.
 | 
						|
 | 
						|
These are a bit arcane because they are derived from the data
 | 
						|
structures used by Python's 'pgen' parser generator.
 | 
						|
 | 
						|
There's also a table here mapping operators to their names in the
 | 
						|
token module; the Python tokenize module reports all operators as the
 | 
						|
fallback token code OP, but the parser needs the actual token code.
 | 
						|
 | 
						|
"""
 | 
						|
 | 
						|
# Python imports
 | 
						|
import pickle
 | 
						|
 | 
						|
# Local imports
 | 
						|
from . import token, tokenize
 | 
						|
 | 
						|
 | 
						|
class Grammar(object):
 | 
						|
    """Pgen parsing tables tables conversion class.
 | 
						|
 | 
						|
    Once initialized, this class supplies the grammar tables for the
 | 
						|
    parsing engine implemented by parse.py.  The parsing engine
 | 
						|
    accesses the instance variables directly.  The class here does not
 | 
						|
    provide initialization of the tables; several subclasses exist to
 | 
						|
    do this (see the conv and pgen modules).
 | 
						|
 | 
						|
    The load() method reads the tables from a pickle file, which is
 | 
						|
    much faster than the other ways offered by subclasses.  The pickle
 | 
						|
    file is written by calling dump() (after loading the grammar
 | 
						|
    tables using a subclass).  The report() method prints a readable
 | 
						|
    representation of the tables to stdout, for debugging.
 | 
						|
 | 
						|
    The instance variables are as follows:
 | 
						|
 | 
						|
    symbol2number -- a dict mapping symbol names to numbers.  Symbol
 | 
						|
                     numbers are always 256 or higher, to distinguish
 | 
						|
                     them from token numbers, which are between 0 and
 | 
						|
                     255 (inclusive).
 | 
						|
 | 
						|
    number2symbol -- a dict mapping numbers to symbol names;
 | 
						|
                     these two are each other's inverse.
 | 
						|
 | 
						|
    states        -- a list of DFAs, where each DFA is a list of
 | 
						|
                     states, each state is is a list of arcs, and each
 | 
						|
                     arc is a (i, j) pair where i is a label and j is
 | 
						|
                     a state number.  The DFA number is the index into
 | 
						|
                     this list.  (This name is slightly confusing.)
 | 
						|
                     Final states are represented by a special arc of
 | 
						|
                     the form (0, j) where j is its own state number.
 | 
						|
 | 
						|
    dfas          -- a dict mapping symbol numbers to (DFA, first)
 | 
						|
                     pairs, where DFA is an item from the states list
 | 
						|
                     above, and first is a set of tokens that can
 | 
						|
                     begin this grammar rule (represented by a dict
 | 
						|
                     whose values are always 1).
 | 
						|
 | 
						|
    labels        -- a list of (x, y) pairs where x is either a token
 | 
						|
                     number or a symbol number, and y is either None
 | 
						|
                     or a string; the strings are keywords.  The label
 | 
						|
                     number is the index in this list; label numbers
 | 
						|
                     are used to mark state transitions (arcs) in the
 | 
						|
                     DFAs.
 | 
						|
 | 
						|
    start         -- the number of the grammar's start symbol.
 | 
						|
 | 
						|
    keywords      -- a dict mapping keyword strings to arc labels.
 | 
						|
 | 
						|
    tokens        -- a dict mapping token numbers to arc labels.
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        self.symbol2number = {}
 | 
						|
        self.number2symbol = {}
 | 
						|
        self.states = []
 | 
						|
        self.dfas = {}
 | 
						|
        self.labels = [(0, "EMPTY")]
 | 
						|
        self.keywords = {}
 | 
						|
        self.tokens = {}
 | 
						|
        self.symbol2label = {}
 | 
						|
        self.start = 256
 | 
						|
 | 
						|
    def dump(self, filename):
 | 
						|
        """Dump the grammar tables to a pickle file."""
 | 
						|
        f = open(filename, "wb")
 | 
						|
        pickle.dump(self.__dict__, f, 2)
 | 
						|
        f.close()
 | 
						|
 | 
						|
    def load(self, filename):
 | 
						|
        """Load the grammar tables from a pickle file."""
 | 
						|
        f = open(filename, "rb")
 | 
						|
        d = pickle.load(f)
 | 
						|
        f.close()
 | 
						|
        self.__dict__.update(d)
 | 
						|
 | 
						|
    def copy(self):
 | 
						|
        """
 | 
						|
        Copy the grammar.
 | 
						|
        """
 | 
						|
        new = self.__class__()
 | 
						|
        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
 | 
						|
                          "tokens", "symbol2label"):
 | 
						|
            setattr(new, dict_attr, getattr(self, dict_attr).copy())
 | 
						|
        new.labels = self.labels[:]
 | 
						|
        new.states = self.states[:]
 | 
						|
        new.start = self.start
 | 
						|
        return new
 | 
						|
 | 
						|
    def report(self):
 | 
						|
        """Dump the grammar tables to standard output, for debugging."""
 | 
						|
        from pprint import pprint
 | 
						|
        print "s2n"
 | 
						|
        pprint(self.symbol2number)
 | 
						|
        print "n2s"
 | 
						|
        pprint(self.number2symbol)
 | 
						|
        print "states"
 | 
						|
        pprint(self.states)
 | 
						|
        print "dfas"
 | 
						|
        pprint(self.dfas)
 | 
						|
        print "labels"
 | 
						|
        pprint(self.labels)
 | 
						|
        print "start", self.start
 | 
						|
 | 
						|
 | 
						|
# Map from operator to number (since tokenize doesn't do this)
 | 
						|
 | 
						|
opmap_raw = """
 | 
						|
( LPAR
 | 
						|
) RPAR
 | 
						|
[ LSQB
 | 
						|
] RSQB
 | 
						|
: COLON
 | 
						|
, COMMA
 | 
						|
; SEMI
 | 
						|
+ PLUS
 | 
						|
- MINUS
 | 
						|
* STAR
 | 
						|
/ SLASH
 | 
						|
| VBAR
 | 
						|
& AMPER
 | 
						|
< LESS
 | 
						|
> GREATER
 | 
						|
= EQUAL
 | 
						|
. DOT
 | 
						|
% PERCENT
 | 
						|
` BACKQUOTE
 | 
						|
{ LBRACE
 | 
						|
} RBRACE
 | 
						|
@ AT
 | 
						|
== EQEQUAL
 | 
						|
!= NOTEQUAL
 | 
						|
<> NOTEQUAL
 | 
						|
<= LESSEQUAL
 | 
						|
>= GREATEREQUAL
 | 
						|
~ TILDE
 | 
						|
^ CIRCUMFLEX
 | 
						|
<< LEFTSHIFT
 | 
						|
>> RIGHTSHIFT
 | 
						|
** DOUBLESTAR
 | 
						|
+= PLUSEQUAL
 | 
						|
-= MINEQUAL
 | 
						|
*= STAREQUAL
 | 
						|
/= SLASHEQUAL
 | 
						|
%= PERCENTEQUAL
 | 
						|
&= AMPEREQUAL
 | 
						|
|= VBAREQUAL
 | 
						|
^= CIRCUMFLEXEQUAL
 | 
						|
<<= LEFTSHIFTEQUAL
 | 
						|
>>= RIGHTSHIFTEQUAL
 | 
						|
**= DOUBLESTAREQUAL
 | 
						|
// DOUBLESLASH
 | 
						|
//= DOUBLESLASHEQUAL
 | 
						|
-> RARROW
 | 
						|
"""
 | 
						|
 | 
						|
opmap = {}
 | 
						|
for line in opmap_raw.splitlines():
 | 
						|
    if line:
 | 
						|
        op, name = line.split()
 | 
						|
        opmap[op] = getattr(token, name)
 |