AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python 2.7.3 made them unavailable from the python.org web site.

These files are a subset of the python-2.7.2.tgz distribution from python.org.  Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution.

Signed-off-by: daryl.mcdaniel@intel.com


git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
darylm503
2012-04-16 22:12:42 +00:00
parent cbc6b5e545
commit 4710c53dca
2106 changed files with 871583 additions and 0 deletions

View File

@@ -0,0 +1,115 @@
-- ASDL's five builtin types are identifier, int, string, object, bool
module Python version "$Revision$"
{
mod = Module(stmt* body)
| Interactive(stmt* body)
| Expression(expr body)
-- not really an actual node but useful in Jython's typesystem.
| Suite(stmt* body)
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list)
| ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list)
| Return(expr? value)
| Delete(expr* targets)
| Assign(expr* targets, expr value)
| AugAssign(expr target, operator op, expr value)
-- not sure if bool is allowed, can always use int
| Print(expr? dest, expr* values, bool nl)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
| With(expr context_expr, expr? optional_vars, stmt* body)
-- 'type' is a bad name
| Raise(expr? type, expr? inst, expr? tback)
| TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
| TryFinally(stmt* body, stmt* finalbody)
| Assert(expr test, expr? msg)
| Import(alias* names)
| ImportFrom(identifier? module, alias* names, int? level)
-- Doesn't capture requirement that locals must be
-- defined if globals is
-- still supports use as a function!
| Exec(expr body, expr? globals, expr? locals)
| Global(identifier* names)
| Expr(expr value)
| Pass | Break | Continue
-- XXX Jython will be different
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset)
-- BoolOp() can use left & right?
expr = BoolOp(boolop op, expr* values)
| BinOp(expr left, operator op, expr right)
| UnaryOp(unaryop op, expr operand)
| Lambda(arguments args, expr body)
| IfExp(expr test, expr body, expr orelse)
| Dict(expr* keys, expr* values)
| Set(expr* elts)
| ListComp(expr elt, comprehension* generators)
| SetComp(expr elt, comprehension* generators)
| DictComp(expr key, expr value, comprehension* generators)
| GeneratorExp(expr elt, comprehension* generators)
-- the grammar constrains where yield expressions can occur
| Yield(expr? value)
-- need sequences for compare to distinguish between
-- x < 4 < 3 and (x < 4) < 3
| Compare(expr left, cmpop* ops, expr* comparators)
| Call(expr func, expr* args, keyword* keywords,
expr? starargs, expr? kwargs)
| Repr(expr value)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
-- other literals? bools?
-- the following expression can appear in assignment context
| Attribute(expr value, identifier attr, expr_context ctx)
| Subscript(expr value, slice slice, expr_context ctx)
| Name(identifier id, expr_context ctx)
| List(expr* elts, expr_context ctx)
| Tuple(expr* elts, expr_context ctx)
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset)
expr_context = Load | Store | Del | AugLoad | AugStore | Param
slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step)
| ExtSlice(slice* dims)
| Index(expr value)
boolop = And | Or
operator = Add | Sub | Mult | Div | Mod | Pow | LShift
| RShift | BitOr | BitXor | BitAnd | FloorDiv
unaryop = Invert | Not | UAdd | USub
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
comprehension = (expr target, expr iter, expr* ifs)
-- not sure what to call the first argument for raise and except
excepthandler = ExceptHandler(expr? type, expr? name, stmt* body)
attributes (int lineno, int col_offset)
arguments = (expr* args, identifier? vararg,
identifier? kwarg, expr* defaults)
-- keyword arguments supplied to call
keyword = (identifier arg, expr value)
-- import name with optional 'as' alias.
alias = (identifier name, identifier? asname)
}

View File

@@ -0,0 +1,125 @@
/* Parser accelerator module */
/* The parser as originally conceived had disappointing performance.
This module does some precomputation that speeds up the selection
of a DFA based upon a token, turning a search through an array
into a simple indexing operation. The parser now cannot work
without the accelerators installed. Note that the accelerators
are installed dynamically when the parser is initialized, they
are not part of the static data structure written on graminit.[ch]
by the parser generator. */
#include "pgenheaders.h"
#include "grammar.h"
#include "node.h"
#include "token.h"
#include "parser.h"
/* Forward references */
static void fixdfa(grammar *, dfa *);
static void fixstate(grammar *, state *);
void
PyGrammar_AddAccelerators(grammar *g)
{
dfa *d;
int i;
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++)
fixdfa(g, d);
g->g_accel = 1;
}
void
PyGrammar_RemoveAccelerators(grammar *g)
{
dfa *d;
int i;
g->g_accel = 0;
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++) {
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++) {
if (s->s_accel)
PyObject_FREE(s->s_accel);
s->s_accel = NULL;
}
}
}
static void
fixdfa(grammar *g, dfa *d)
{
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++)
fixstate(g, s);
}
static void
fixstate(grammar *g, state *s)
{
arc *a;
int k;
int *accel;
int nl = g->g_ll.ll_nlabels;
s->s_accept = 0;
accel = (int *) PyObject_MALLOC(nl * sizeof(int));
if (accel == NULL) {
fprintf(stderr, "no mem to build parser accelerators\n");
exit(1);
}
for (k = 0; k < nl; k++)
accel[k] = -1;
a = s->s_arc;
for (k = s->s_narcs; --k >= 0; a++) {
int lbl = a->a_lbl;
label *l = &g->g_ll.ll_label[lbl];
int type = l->lb_type;
if (a->a_arrow >= (1 << 7)) {
printf("XXX too many states!\n");
continue;
}
if (ISNONTERMINAL(type)) {
dfa *d1 = PyGrammar_FindDFA(g, type);
int ibit;
if (type - NT_OFFSET >= (1 << 7)) {
printf("XXX too high nonterminal number!\n");
continue;
}
for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
if (testbit(d1->d_first, ibit)) {
if (accel[ibit] != -1)
printf("XXX ambiguity!\n");
accel[ibit] = a->a_arrow | (1 << 7) |
((type - NT_OFFSET) << 8);
}
}
}
else if (lbl == EMPTY)
s->s_accept = 1;
else if (lbl >= 0 && lbl < nl)
accel[lbl] = a->a_arrow;
}
while (nl > 0 && accel[nl-1] == -1)
nl--;
for (k = 0; k < nl && accel[k] == -1;)
k++;
if (k < nl) {
int i;
s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
if (s->s_accel == NULL) {
fprintf(stderr, "no mem to add parser accelerators\n");
exit(1);
}
s->s_lower = k;
s->s_upper = nl;
for (i = 0; k < nl; i++, k++)
s->s_accel[i] = accel[k];
}
PyObject_FREE(accel);
}

View File

@@ -0,0 +1,413 @@
"""An implementation of the Zephyr Abstract Syntax Definition Language.
See http://asdl.sourceforge.net/ and
http://www.cs.princeton.edu/research/techreps/TR-554-97
Only supports top level module decl, not view. I'm guessing that view
is intended to support the browser and I'm not interested in the
browser.
Changes for Python: Add support for module versions
"""
import os
import traceback
import spark
class Token(object):
# spark seems to dispatch in the parser based on a token's
# type attribute
def __init__(self, type, lineno):
self.type = type
self.lineno = lineno
def __str__(self):
return self.type
def __repr__(self):
return str(self)
class Id(Token):
def __init__(self, value, lineno):
self.type = 'Id'
self.value = value
self.lineno = lineno
def __str__(self):
return self.value
class String(Token):
def __init__(self, value, lineno):
self.type = 'String'
self.value = value
self.lineno = lineno
class ASDLSyntaxError(Exception):
def __init__(self, lineno, token=None, msg=None):
self.lineno = lineno
self.token = token
self.msg = msg
def __str__(self):
if self.msg is None:
return "Error at '%s', line %d" % (self.token, self.lineno)
else:
return "%s, line %d" % (self.msg, self.lineno)
class ASDLScanner(spark.GenericScanner, object):
def tokenize(self, input):
self.rv = []
self.lineno = 1
super(ASDLScanner, self).tokenize(input)
return self.rv
def t_id(self, s):
r"[\w\.]+"
# XXX doesn't distinguish upper vs. lower, which is
# significant for ASDL.
self.rv.append(Id(s, self.lineno))
def t_string(self, s):
r'"[^"]*"'
self.rv.append(String(s, self.lineno))
def t_xxx(self, s): # not sure what this production means
r"<="
self.rv.append(Token(s, self.lineno))
def t_punctuation(self, s):
r"[\{\}\*\=\|\(\)\,\?\:]"
self.rv.append(Token(s, self.lineno))
def t_comment(self, s):
r"\-\-[^\n]*"
pass
def t_newline(self, s):
r"\n"
self.lineno += 1
def t_whitespace(self, s):
r"[ \t]+"
pass
def t_default(self, s):
r" . +"
raise ValueError, "unmatched input: %s" % `s`
class ASDLParser(spark.GenericParser, object):
def __init__(self):
super(ASDLParser, self).__init__("module")
def typestring(self, tok):
return tok.type
def error(self, tok):
raise ASDLSyntaxError(tok.lineno, tok)
def p_module_0(self, (module, name, version, _0, _1)):
" module ::= Id Id version { } "
if module.value != "module":
raise ASDLSyntaxError(module.lineno,
msg="expected 'module', found %s" % module)
return Module(name, None, version)
def p_module(self, (module, name, version, _0, definitions, _1)):
" module ::= Id Id version { definitions } "
if module.value != "module":
raise ASDLSyntaxError(module.lineno,
msg="expected 'module', found %s" % module)
return Module(name, definitions, version)
def p_version(self, (version, V)):
"version ::= Id String"
if version.value != "version":
raise ASDLSyntaxError(version.lineno,
msg="expected 'version', found %" % version)
return V
def p_definition_0(self, (definition,)):
" definitions ::= definition "
return definition
def p_definition_1(self, (definitions, definition)):
" definitions ::= definition definitions "
return definitions + definition
def p_definition(self, (id, _, type)):
" definition ::= Id = type "
return [Type(id, type)]
def p_type_0(self, (product,)):
" type ::= product "
return product
def p_type_1(self, (sum,)):
" type ::= sum "
return Sum(sum)
def p_type_2(self, (sum, id, _0, attributes, _1)):
" type ::= sum Id ( fields ) "
if id.value != "attributes":
raise ASDLSyntaxError(id.lineno,
msg="expected attributes, found %s" % id)
if attributes:
attributes.reverse()
return Sum(sum, attributes)
def p_product(self, (_0, fields, _1)):
" product ::= ( fields ) "
# XXX can't I just construct things in the right order?
fields.reverse()
return Product(fields)
def p_sum_0(self, (constructor,)):
" sum ::= constructor "
return [constructor]
def p_sum_1(self, (constructor, _, sum)):
" sum ::= constructor | sum "
return [constructor] + sum
def p_sum_2(self, (constructor, _, sum)):
" sum ::= constructor | sum "
return [constructor] + sum
def p_constructor_0(self, (id,)):
" constructor ::= Id "
return Constructor(id)
def p_constructor_1(self, (id, _0, fields, _1)):
" constructor ::= Id ( fields ) "
# XXX can't I just construct things in the right order?
fields.reverse()
return Constructor(id, fields)
def p_fields_0(self, (field,)):
" fields ::= field "
return [field]
def p_fields_1(self, (field, _, fields)):
" fields ::= field , fields "
return fields + [field]
def p_field_0(self, (type,)):
" field ::= Id "
return Field(type)
def p_field_1(self, (type, name)):
" field ::= Id Id "
return Field(type, name)
def p_field_2(self, (type, _, name)):
" field ::= Id * Id "
return Field(type, name, seq=True)
def p_field_3(self, (type, _, name)):
" field ::= Id ? Id "
return Field(type, name, opt=True)
def p_field_4(self, (type, _)):
" field ::= Id * "
return Field(type, seq=True)
def p_field_5(self, (type, _)):
" field ::= Id ? "
return Field(type, opt=True)
builtin_types = ("identifier", "string", "int", "bool", "object")
# below is a collection of classes to capture the AST of an AST :-)
# not sure if any of the methods are useful yet, but I'm adding them
# piecemeal as they seem helpful
class AST(object):
pass # a marker class
class Module(AST):
def __init__(self, name, dfns, version):
self.name = name
self.dfns = dfns
self.version = version
self.types = {} # maps type name to value (from dfns)
for type in dfns:
self.types[type.name.value] = type.value
def __repr__(self):
return "Module(%s, %s)" % (self.name, self.dfns)
class Type(AST):
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return "Type(%s, %s)" % (self.name, self.value)
class Constructor(AST):
def __init__(self, name, fields=None):
self.name = name
self.fields = fields or []
def __repr__(self):
return "Constructor(%s, %s)" % (self.name, self.fields)
class Field(AST):
def __init__(self, type, name=None, seq=False, opt=False):
self.type = type
self.name = name
self.seq = seq
self.opt = opt
def __repr__(self):
if self.seq:
extra = ", seq=True"
elif self.opt:
extra = ", opt=True"
else:
extra = ""
if self.name is None:
return "Field(%s%s)" % (self.type, extra)
else:
return "Field(%s, %s%s)" % (self.type, self.name, extra)
class Sum(AST):
def __init__(self, types, attributes=None):
self.types = types
self.attributes = attributes or []
def __repr__(self):
if self.attributes is None:
return "Sum(%s)" % self.types
else:
return "Sum(%s, %s)" % (self.types, self.attributes)
class Product(AST):
def __init__(self, fields):
self.fields = fields
def __repr__(self):
return "Product(%s)" % self.fields
class VisitorBase(object):
def __init__(self, skip=False):
self.cache = {}
self.skip = skip
def visit(self, object, *args):
meth = self._dispatch(object)
if meth is None:
return
try:
meth(object, *args)
except Exception, err:
print "Error visiting", repr(object)
print err
traceback.print_exc()
# XXX hack
if hasattr(self, 'file'):
self.file.flush()
os._exit(1)
def _dispatch(self, object):
assert isinstance(object, AST), repr(object)
klass = object.__class__
meth = self.cache.get(klass)
if meth is None:
methname = "visit" + klass.__name__
if self.skip:
meth = getattr(self, methname, None)
else:
meth = getattr(self, methname)
self.cache[klass] = meth
return meth
class Check(VisitorBase):
def __init__(self):
super(Check, self).__init__(skip=True)
self.cons = {}
self.errors = 0
self.types = {}
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value, str(type.name))
def visitSum(self, sum, name):
for t in sum.types:
self.visit(t, name)
def visitConstructor(self, cons, name):
key = str(cons.name)
conflict = self.cons.get(key)
if conflict is None:
self.cons[key] = name
else:
print "Redefinition of constructor %s" % key
print "Defined in %s and %s" % (conflict, name)
self.errors += 1
for f in cons.fields:
self.visit(f, key)
def visitField(self, field, name):
key = str(field.type)
l = self.types.setdefault(key, [])
l.append(name)
def visitProduct(self, prod, name):
for f in prod.fields:
self.visit(f, name)
def check(mod):
v = Check()
v.visit(mod)
for t in v.types:
if t not in mod.types and not t in builtin_types:
v.errors += 1
uses = ", ".join(v.types[t])
print "Undefined type %s, used in %s" % (t, uses)
return not v.errors
def parse(file):
scanner = ASDLScanner()
parser = ASDLParser()
buf = open(file).read()
tokens = scanner.tokenize(buf)
try:
return parser.parse(tokens)
except ASDLSyntaxError, err:
print err
lines = buf.split("\n")
print lines[err.lineno - 1] # lines starts at 0, files at 1
if __name__ == "__main__":
import glob
import sys
if len(sys.argv) > 1:
files = sys.argv[1:]
else:
testdir = "tests"
files = glob.glob(testdir + "/*.asdl")
for file in files:
print file
mod = parse(file)
print "module", mod.name
print len(mod.dfns), "definitions"
if not check(mod):
print "Check failed"
else:
for dfn in mod.dfns:
print dfn.type

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,66 @@
/* Bitset primitives used by the parser generator */
#include "pgenheaders.h"
#include "bitset.h"
bitset
newbitset(int nbits)
{
int nbytes = NBYTES(nbits);
bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes);
if (ss == NULL)
Py_FatalError("no mem for bitset");
ss += nbytes;
while (--nbytes >= 0)
*--ss = 0;
return ss;
}
void
delbitset(bitset ss)
{
PyObject_FREE(ss);
}
int
addbit(bitset ss, int ibit)
{
int ibyte = BIT2BYTE(ibit);
BYTE mask = BIT2MASK(ibit);
if (ss[ibyte] & mask)
return 0; /* Bit already set */
ss[ibyte] |= mask;
return 1;
}
#if 0 /* Now a macro */
int
testbit(bitset ss, int ibit)
{
return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
}
#endif
int
samebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
if (*ss1++ != *ss2++)
return 0;
return 1;
}
void
mergebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
*ss1++ |= *ss2++;
}

View File

@@ -0,0 +1,113 @@
/* Computation of FIRST stets */
#include "pgenheaders.h"
#include "grammar.h"
#include "token.h"
extern int Py_DebugFlag;
/* Forward */
static void calcfirstset(grammar *, dfa *);
void
addfirstsets(grammar *g)
{
int i;
dfa *d;
if (Py_DebugFlag)
printf("Adding FIRST sets ...\n");
for (i = 0; i < g->g_ndfas; i++) {
d = &g->g_dfa[i];
if (d->d_first == NULL)
calcfirstset(g, d);
}
}
static void
calcfirstset(grammar *g, dfa *d)
{
int i, j;
state *s;
arc *a;
int nsyms;
int *sym;
int nbits;
static bitset dummy;
bitset result;
int type;
dfa *d1;
label *l0;
if (Py_DebugFlag)
printf("Calculate FIRST set for '%s'\n", d->d_name);
if (dummy == NULL)
dummy = newbitset(1);
if (d->d_first == dummy) {
fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
return;
}
if (d->d_first != NULL) {
fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
d->d_name);
}
d->d_first = dummy;
l0 = g->g_ll.ll_label;
nbits = g->g_ll.ll_nlabels;
result = newbitset(nbits);
sym = (int *)PyObject_MALLOC(sizeof(int));
if (sym == NULL)
Py_FatalError("no mem for new sym in calcfirstset");
nsyms = 1;
sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
s = &d->d_state[d->d_initial];
for (i = 0; i < s->s_narcs; i++) {
a = &s->s_arc[i];
for (j = 0; j < nsyms; j++) {
if (sym[j] == a->a_lbl)
break;
}
if (j >= nsyms) { /* New label */
sym = (int *)PyObject_REALLOC(sym,
sizeof(int) * (nsyms + 1));
if (sym == NULL)
Py_FatalError(
"no mem to resize sym in calcfirstset");
sym[nsyms++] = a->a_lbl;
type = l0[a->a_lbl].lb_type;
if (ISNONTERMINAL(type)) {
d1 = PyGrammar_FindDFA(g, type);
if (d1->d_first == dummy) {
fprintf(stderr,
"Left-recursion below '%s'\n",
d->d_name);
}
else {
if (d1->d_first == NULL)
calcfirstset(g, d1);
mergebitset(result,
d1->d_first, nbits);
}
}
else if (ISTERMINAL(type)) {
addbit(result, a->a_lbl);
}
}
}
d->d_first = result;
if (Py_DebugFlag) {
printf("FIRST set for '%s': {", d->d_name);
for (i = 0; i < nbits; i++) {
if (testbit(result, i))
printf(" %s", PyGrammar_LabelRepr(&l0[i]));
}
printf(" }\n");
}
PyObject_FREE(sym);
}

View File

@@ -0,0 +1,254 @@
/* Grammar implementation */
#include "Python.h"
#include "pgenheaders.h"
#include <ctype.h>
#include "token.h"
#include "grammar.h"
#ifdef RISCOS
#include <unixlib.h>
#endif
extern int Py_DebugFlag;
grammar *
newgrammar(int start)
{
grammar *g;
g = (grammar *)PyObject_MALLOC(sizeof(grammar));
if (g == NULL)
Py_FatalError("no mem for new grammar");
g->g_ndfas = 0;
g->g_dfa = NULL;
g->g_start = start;
g->g_ll.ll_nlabels = 0;
g->g_ll.ll_label = NULL;
g->g_accel = 0;
return g;
}
dfa *
adddfa(grammar *g, int type, char *name)
{
dfa *d;
g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
sizeof(dfa) * (g->g_ndfas + 1));
if (g->g_dfa == NULL)
Py_FatalError("no mem to resize dfa in adddfa");
d = &g->g_dfa[g->g_ndfas++];
d->d_type = type;
d->d_name = strdup(name);
d->d_nstates = 0;
d->d_state = NULL;
d->d_initial = -1;
d->d_first = NULL;
return d; /* Only use while fresh! */
}
int
addstate(dfa *d)
{
state *s;
d->d_state = (state *)PyObject_REALLOC(d->d_state,
sizeof(state) * (d->d_nstates + 1));
if (d->d_state == NULL)
Py_FatalError("no mem to resize state in addstate");
s = &d->d_state[d->d_nstates++];
s->s_narcs = 0;
s->s_arc = NULL;
s->s_lower = 0;
s->s_upper = 0;
s->s_accel = NULL;
s->s_accept = 0;
return s - d->d_state;
}
void
addarc(dfa *d, int from, int to, int lbl)
{
state *s;
arc *a;
assert(0 <= from && from < d->d_nstates);
assert(0 <= to && to < d->d_nstates);
s = &d->d_state[from];
s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
if (s->s_arc == NULL)
Py_FatalError("no mem to resize arc list in addarc");
a = &s->s_arc[s->s_narcs++];
a->a_lbl = lbl;
a->a_arrow = to;
}
int
addlabel(labellist *ll, int type, char *str)
{
int i;
label *lb;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type &&
strcmp(ll->ll_label[i].lb_str, str) == 0)
return i;
}
ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
sizeof(label) * (ll->ll_nlabels + 1));
if (ll->ll_label == NULL)
Py_FatalError("no mem to resize labellist in addlabel");
lb = &ll->ll_label[ll->ll_nlabels++];
lb->lb_type = type;
lb->lb_str = strdup(str);
if (Py_DebugFlag)
printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
PyGrammar_LabelRepr(lb));
return lb - ll->ll_label;
}
/* Same, but rather dies than adds */
int
findlabel(labellist *ll, int type, char *str)
{
int i;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type /*&&
strcmp(ll->ll_label[i].lb_str, str) == 0*/)
return i;
}
fprintf(stderr, "Label %d/'%s' not found\n", type, str);
Py_FatalError("grammar.c:findlabel()");
return 0; /* Make gcc -Wall happy */
}
/* Forward */
static void translabel(grammar *, label *);
void
translatelabels(grammar *g)
{
int i;
#ifdef Py_DEBUG
printf("Translating labels ...\n");
#endif
/* Don't translate EMPTY */
for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
translabel(g, &g->g_ll.ll_label[i]);
}
static void
translabel(grammar *g, label *lb)
{
int i;
if (Py_DebugFlag)
printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
if (lb->lb_type == NAME) {
for (i = 0; i < g->g_ndfas; i++) {
if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
if (Py_DebugFlag)
printf(
"Label %s is non-terminal %d.\n",
lb->lb_str,
g->g_dfa[i].d_type);
lb->lb_type = g->g_dfa[i].d_type;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
for (i = 0; i < (int)N_TOKENS; i++) {
if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
if (Py_DebugFlag)
printf("Label %s is terminal %d.\n",
lb->lb_str, i);
lb->lb_type = i;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
printf("Can't translate NAME label '%s'\n", lb->lb_str);
return;
}
if (lb->lb_type == STRING) {
if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
lb->lb_str[1] == '_') {
char *p;
char *src;
char *dest;
size_t name_len;
if (Py_DebugFlag)
printf("Label %s is a keyword\n", lb->lb_str);
lb->lb_type = NAME;
src = lb->lb_str + 1;
p = strchr(src, '\'');
if (p)
name_len = p - src;
else
name_len = strlen(src);
dest = (char *)malloc(name_len + 1);
if (!dest) {
printf("Can't alloc dest '%s'\n", src);
return;
}
strncpy(dest, src, name_len);
dest[name_len] = '\0';
free(lb->lb_str);
lb->lb_str = dest;
}
else if (lb->lb_str[2] == lb->lb_str[0]) {
int type = (int) PyToken_OneChar(lb->lb_str[1]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
int type = (int) PyToken_TwoChars(lb->lb_str[1],
lb->lb_str[2]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
int type = (int) PyToken_ThreeChars(lb->lb_str[1],
lb->lb_str[2],
lb->lb_str[3]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else
printf("Can't translate STRING label %s\n",
lb->lb_str);
}
else
printf("Can't translate label '%s'\n",
PyGrammar_LabelRepr(lb));
}

View File

@@ -0,0 +1,57 @@
/* Grammar subroutines needed by parser */
#include "Python.h"
#include "pgenheaders.h"
#include "grammar.h"
#include "token.h"
/* Return the DFA for the given type */
dfa *
PyGrammar_FindDFA(grammar *g, register int type)
{
register dfa *d;
#if 1
/* Massive speed-up */
d = &g->g_dfa[type - NT_OFFSET];
assert(d->d_type == type);
return d;
#else
/* Old, slow version */
register int i;
for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
if (d->d_type == type)
return d;
}
assert(0);
/* NOTREACHED */
#endif
}
char *
PyGrammar_LabelRepr(label *lb)
{
static char buf[100];
if (lb->lb_type == ENDMARKER)
return "EMPTY";
else if (ISNONTERMINAL(lb->lb_type)) {
if (lb->lb_str == NULL) {
PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
return buf;
}
else
return lb->lb_str;
}
else {
if (lb->lb_str == NULL)
return _PyParser_TokenNames[lb->lb_type];
else {
PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
_PyParser_TokenNames[lb->lb_type], lb->lb_str);
return buf;
}
}
}

View File

@@ -0,0 +1,178 @@
/* Check for interrupts */
#include "Python.h"
#include "pythread.h"
#ifdef QUICKWIN
#include <io.h>
void
PyOS_InitInterrupts(void)
{
}
void
PyOS_FiniInterrupts(void)
{
}
int
PyOS_InterruptOccurred(void)
{
_wyield();
}
#define OK
#endif /* QUICKWIN */
#if defined(_M_IX86) && !defined(__QNX__)
#include <io.h>
#endif
#if defined(MSDOS) && !defined(QUICKWIN)
#ifdef __GNUC__
/* This is for DJGPP's GO32 extender. I don't know how to trap
* control-C (There's no API for ctrl-C, and I don't want to mess with
* the interrupt vectors.) However, this DOES catch control-break.
* --Amrit
*/
#include <go32.h>
void
PyOS_InitInterrupts(void)
{
_go32_want_ctrl_break(1 /* TRUE */);
}
void
PyOS_FiniInterrupts(void)
{
}
int
PyOS_InterruptOccurred(void)
{
return _go32_was_ctrl_break_hit();
}
#else /* !__GNUC__ */
/* This might work for MS-DOS (untested though): */
void
PyOS_InitInterrupts(void)
{
}
void
PyOS_FiniInterrupts(void)
{
}
int
PyOS_InterruptOccurred(void)
{
int interrupted = 0;
while (kbhit()) {
if (getch() == '\003')
interrupted = 1;
}
return interrupted;
}
#endif /* __GNUC__ */
#define OK
#endif /* MSDOS && !QUICKWIN */
#ifndef OK
/* Default version -- for real operating systems and for Standard C */
#include <stdio.h>
#include <string.h>
#include <signal.h>
static int interrupted;
void
PyErr_SetInterrupt(void)
{
interrupted = 1;
}
extern int PyErr_CheckSignals(void);
static int
checksignals_witharg(void * arg)
{
return PyErr_CheckSignals();
}
static void
intcatcher(int sig)
{
extern void Py_Exit(int);
static char message[] =
"python: to interrupt a truly hanging Python program, interrupt once more.\n";
switch (interrupted++) {
case 0:
break;
case 1:
#ifdef RISCOS
fprintf(stderr, message);
#else
write(2, message, strlen(message));
#endif
break;
case 2:
interrupted = 0;
Py_Exit(1);
break;
}
PyOS_setsig(SIGINT, intcatcher);
Py_AddPendingCall(checksignals_witharg, NULL);
}
static void (*old_siginthandler)(int) = SIG_DFL;
void
PyOS_InitInterrupts(void)
{
if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
PyOS_setsig(SIGINT, intcatcher);
}
void
PyOS_FiniInterrupts(void)
{
PyOS_setsig(SIGINT, old_siginthandler);
}
int
PyOS_InterruptOccurred(void)
{
if (!interrupted)
return 0;
interrupted = 0;
return 1;
}
#endif /* !OK */
void
PyOS_AfterFork(void)
{
#ifdef WITH_THREAD
PyEval_ReInitThreads();
PyThread_ReInitTLS();
#endif
}

View File

@@ -0,0 +1,66 @@
/* List a node on a file */
#include "pgenheaders.h"
#include "token.h"
#include "node.h"
/* Forward */
static void list1node(FILE *, node *);
static void listnode(FILE *, node *);
void
PyNode_ListTree(node *n)
{
listnode(stdout, n);
}
static int level, atbol;
static void
listnode(FILE *fp, node *n)
{
level = 0;
atbol = 1;
list1node(fp, n);
}
static void
list1node(FILE *fp, node *n)
{
if (n == 0)
return;
if (ISNONTERMINAL(TYPE(n))) {
int i;
for (i = 0; i < NCH(n); i++)
list1node(fp, CHILD(n, i));
}
else if (ISTERMINAL(TYPE(n))) {
switch (TYPE(n)) {
case INDENT:
++level;
break;
case DEDENT:
--level;
break;
default:
if (atbol) {
int i;
for (i = 0; i < level; ++i)
fprintf(fp, "\t");
atbol = 0;
}
if (TYPE(n) == NEWLINE) {
if (STR(n) != NULL)
fprintf(fp, "%s", STR(n));
fprintf(fp, "\n");
atbol = 1;
}
else
fprintf(fp, "%s ", STR(n));
break;
}
}
else
fprintf(fp, "? ");
}

View File

@@ -0,0 +1,159 @@
#include "pgenheaders.h"
#include "metagrammar.h"
#include "grammar.h"
#include "pgen.h"
static arc arcs_0_0[3] = {
{2, 0},
{3, 0},
{4, 1},
};
static arc arcs_0_1[1] = {
{0, 1},
};
static state states_0[2] = {
{3, arcs_0_0},
{1, arcs_0_1},
};
static arc arcs_1_0[1] = {
{5, 1},
};
static arc arcs_1_1[1] = {
{6, 2},
};
static arc arcs_1_2[1] = {
{7, 3},
};
static arc arcs_1_3[1] = {
{3, 4},
};
static arc arcs_1_4[1] = {
{0, 4},
};
static state states_1[5] = {
{1, arcs_1_0},
{1, arcs_1_1},
{1, arcs_1_2},
{1, arcs_1_3},
{1, arcs_1_4},
};
static arc arcs_2_0[1] = {
{8, 1},
};
static arc arcs_2_1[2] = {
{9, 0},
{0, 1},
};
static state states_2[2] = {
{1, arcs_2_0},
{2, arcs_2_1},
};
static arc arcs_3_0[1] = {
{10, 1},
};
static arc arcs_3_1[2] = {
{10, 1},
{0, 1},
};
static state states_3[2] = {
{1, arcs_3_0},
{2, arcs_3_1},
};
static arc arcs_4_0[2] = {
{11, 1},
{13, 2},
};
static arc arcs_4_1[1] = {
{7, 3},
};
static arc arcs_4_2[3] = {
{14, 4},
{15, 4},
{0, 2},
};
static arc arcs_4_3[1] = {
{12, 4},
};
static arc arcs_4_4[1] = {
{0, 4},
};
static state states_4[5] = {
{2, arcs_4_0},
{1, arcs_4_1},
{3, arcs_4_2},
{1, arcs_4_3},
{1, arcs_4_4},
};
static arc arcs_5_0[3] = {
{5, 1},
{16, 1},
{17, 2},
};
static arc arcs_5_1[1] = {
{0, 1},
};
static arc arcs_5_2[1] = {
{7, 3},
};
static arc arcs_5_3[1] = {
{18, 1},
};
static state states_5[4] = {
{3, arcs_5_0},
{1, arcs_5_1},
{1, arcs_5_2},
{1, arcs_5_3},
};
static dfa dfas[6] = {
{256, "MSTART", 0, 2, states_0,
"\070\000\000"},
{257, "RULE", 0, 5, states_1,
"\040\000\000"},
{258, "RHS", 0, 2, states_2,
"\040\010\003"},
{259, "ALT", 0, 2, states_3,
"\040\010\003"},
{260, "ITEM", 0, 5, states_4,
"\040\010\003"},
{261, "ATOM", 0, 4, states_5,
"\040\000\003"},
};
static label labels[19] = {
{0, "EMPTY"},
{256, 0},
{257, 0},
{4, 0},
{0, 0},
{1, 0},
{11, 0},
{258, 0},
{259, 0},
{18, 0},
{260, 0},
{9, 0},
{10, 0},
{261, 0},
{16, 0},
{14, 0},
{3, 0},
{7, 0},
{8, 0},
};
static grammar _PyParser_Grammar = {
6,
dfas,
{19, labels},
256
};
grammar *
meta_grammar(void)
{
return &_PyParser_Grammar;
}
grammar *
Py_meta_grammar(void)
{
return meta_grammar();
}

View File

@@ -0,0 +1,221 @@
/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
By default, or when stdin is not a tty device, we have a super
simple my_readline function using fgets.
Optionally, we can use the GNU readline library.
my_readline() has a different return value from GNU readline():
- NULL if an interrupt occurred or if an error occurred
- a malloc'ed empty string if EOF was read
- a malloc'ed string ending in \n normally
*/
#include "Python.h"
#ifdef MS_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include "windows.h"
#endif /* MS_WINDOWS */
#ifdef __VMS
extern char* vms__StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt);
#endif
PyThreadState* _PyOS_ReadlineTState;
#ifdef WITH_THREAD
#include "pythread.h"
static PyThread_type_lock _PyOS_ReadlineLock = NULL;
#endif
int (*PyOS_InputHook)(void) = NULL;
#ifdef RISCOS
int Py_RISCOSWimpFlag;
#endif
/* This function restarts a fgets() after an EINTR error occurred
except if PyOS_InterruptOccurred() returns true. */
static int
my_fgets(char *buf, int len, FILE *fp)
{
char *p;
while (1) {
if (PyOS_InputHook != NULL)
(void)(PyOS_InputHook)();
errno = 0;
p = fgets(buf, len, fp);
if (p != NULL)
return 0; /* No error */
#ifdef MS_WINDOWS
/* In the case of a Ctrl+C or some other external event
interrupting the operation:
Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32
error code (and feof() returns TRUE).
Win9x: Ctrl+C seems to have no effect on fgets() returning
early - the signal handler is called, but the fgets()
only returns "normally" (ie, when Enter hit or feof())
*/
if (GetLastError()==ERROR_OPERATION_ABORTED) {
/* Signals come asynchronously, so we sleep a brief
moment before checking if the handler has been
triggered (we cant just return 1 before the
signal handler has been called, as the later
signal may be treated as a separate interrupt).
*/
Sleep(1);
if (PyOS_InterruptOccurred()) {
return 1; /* Interrupt */
}
/* Either the sleep wasn't long enough (need a
short loop retrying?) or not interrupted at all
(in which case we should revisit the whole thing!)
Logging some warning would be nice. assert is not
viable as under the debugger, the various dialogs
mean the condition is not true.
*/
}
#endif /* MS_WINDOWS */
if (feof(fp)) {
clearerr(fp);
return -1; /* EOF */
}
#ifdef EINTR
if (errno == EINTR) {
int s;
#ifdef WITH_THREAD
PyEval_RestoreThread(_PyOS_ReadlineTState);
#endif
s = PyErr_CheckSignals();
#ifdef WITH_THREAD
PyEval_SaveThread();
#endif
if (s < 0)
return 1;
/* try again */
continue;
}
#endif
if (PyOS_InterruptOccurred()) {
return 1; /* Interrupt */
}
return -2; /* Error */
}
/* NOTREACHED */
}
/* Readline implementation using fgets() */
char *
PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
size_t n;
char *p;
n = 100;
if ((p = (char *)PyMem_MALLOC(n)) == NULL)
return NULL;
fflush(sys_stdout);
#ifndef RISCOS
if (prompt)
fprintf(stderr, "%s", prompt);
#else
if (prompt) {
if(Py_RISCOSWimpFlag)
fprintf(stderr, "\x0cr%s\x0c", prompt);
else
fprintf(stderr, "%s", prompt);
}
#endif
fflush(stderr);
switch (my_fgets(p, (int)n, sys_stdin)) {
case 0: /* Normal case */
break;
case 1: /* Interrupt */
PyMem_FREE(p);
return NULL;
case -1: /* EOF */
case -2: /* Error */
default: /* Shouldn't happen */
*p = '\0';
break;
}
n = strlen(p);
while (n > 0 && p[n-1] != '\n') {
size_t incr = n+2;
p = (char *)PyMem_REALLOC(p, n + incr);
if (p == NULL)
return NULL;
if (incr > INT_MAX) {
PyErr_SetString(PyExc_OverflowError, "input line too long");
}
if (my_fgets(p+n, (int)incr, sys_stdin) != 0)
break;
n += strlen(p+n);
}
return (char *)PyMem_REALLOC(p, n+1);
}
/* By initializing this function pointer, systems embedding Python can
override the readline function.
Note: Python expects in return a buffer allocated with PyMem_Malloc. */
char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *);
/* Interface used by tokenizer.c and bltinmodule.c */
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
char *rv;
if (_PyOS_ReadlineTState == PyThreadState_GET()) {
PyErr_SetString(PyExc_RuntimeError,
"can't re-enter readline");
return NULL;
}
if (PyOS_ReadlineFunctionPointer == NULL) {
#ifdef __VMS
PyOS_ReadlineFunctionPointer = vms__StdioReadline;
#else
PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
#endif
}
#ifdef WITH_THREAD
if (_PyOS_ReadlineLock == NULL) {
_PyOS_ReadlineLock = PyThread_allocate_lock();
}
#endif
_PyOS_ReadlineTState = PyThreadState_GET();
Py_BEGIN_ALLOW_THREADS
#ifdef WITH_THREAD
PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
#endif
/* This is needed to handle the unlikely case that the
* interpreter is in interactive mode *and* stdin/out are not
* a tty. This can happen, for example if python is run like
* this: python -i < test1.py
*/
if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))
rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);
else
rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,
prompt);
Py_END_ALLOW_THREADS
#ifdef WITH_THREAD
PyThread_release_lock(_PyOS_ReadlineLock);
#endif
_PyOS_ReadlineTState = NULL;
return rv;
}

View File

@@ -0,0 +1,138 @@
/* Parse tree node implementation */
#include "Python.h"
#include "node.h"
#include "errcode.h"
node *
PyNode_New(int type)
{
node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
if (n == NULL)
return NULL;
n->n_type = type;
n->n_str = NULL;
n->n_lineno = 0;
n->n_nchildren = 0;
n->n_child = NULL;
return n;
}
/* See comments at XXXROUNDUP below. Returns -1 on overflow. */
static int
fancy_roundup(int n)
{
/* Round up to the closest power of 2 >= n. */
int result = 256;
assert(n > 128);
while (result < n) {
result <<= 1;
if (result <= 0)
return -1;
}
return result;
}
/* A gimmick to make massive numbers of reallocs quicker. The result is
* a number >= the input. In PyNode_AddChild, it's used like so, when
* we're about to add child number current_size + 1:
*
* if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1):
* allocate space for XXXROUNDUP(current_size + 1) total children
* else:
* we already have enough space
*
* Since a node starts out empty, we must have
*
* XXXROUNDUP(0) < XXXROUNDUP(1)
*
* so that we allocate space for the first child. One-child nodes are very
* common (presumably that would change if we used a more abstract form
* of syntax tree), so to avoid wasting memory it's desirable that
* XXXROUNDUP(1) == 1. That in turn forces XXXROUNDUP(0) == 0.
*
* Else for 2 <= n <= 128, we round up to the closest multiple of 4. Why 4?
* Rounding up to a multiple of an exact power of 2 is very efficient, and
* most nodes with more than one child have <= 4 kids.
*
* Else we call fancy_roundup() to grow proportionately to n. We've got an
* extreme case then (like test_longexp.py), and on many platforms doing
* anything less than proportional growth leads to exorbitant runtime
* (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
* Win98).
*
* In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre
* reported that, with this scheme, 89% of PyObject_REALLOC calls in
* PyNode_AddChild passed 1 for the size, and 9% passed 4. So this usually
* wastes very little memory, but is very effective at sidestepping
* platform-realloc disasters on vulnerable platforms.
*
* Note that this would be straightforward if a node stored its current
* capacity. The code is tricky to avoid that.
*/
#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \
(n) <= 128 ? (((n) + 3) & ~3) : \
fancy_roundup(n))
int
PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset)
{
const int nch = n1->n_nchildren;
int current_capacity;
int required_capacity;
node *n;
if (nch == INT_MAX || nch < 0)
return E_OVERFLOW;
current_capacity = XXXROUNDUP(nch);
required_capacity = XXXROUNDUP(nch + 1);
if (current_capacity < 0 || required_capacity < 0)
return E_OVERFLOW;
if (current_capacity < required_capacity) {
if (required_capacity > PY_SIZE_MAX / sizeof(node)) {
return E_NOMEM;
}
n = n1->n_child;
n = (node *) PyObject_REALLOC(n,
required_capacity * sizeof(node));
if (n == NULL)
return E_NOMEM;
n1->n_child = n;
}
n = &n1->n_child[n1->n_nchildren++];
n->n_type = type;
n->n_str = str;
n->n_lineno = lineno;
n->n_col_offset = col_offset;
n->n_nchildren = 0;
n->n_child = NULL;
return 0;
}
/* Forward */
static void freechildren(node *);
void
PyNode_Free(node *n)
{
if (n != NULL) {
freechildren(n);
PyObject_FREE(n);
}
}
static void
freechildren(node *n)
{
int i;
for (i = NCH(n); --i >= 0; )
freechildren(CHILD(n, i));
if (n->n_child != NULL)
PyObject_FREE(n->n_child);
if (STR(n) != NULL)
PyObject_FREE(STR(n));
}

View File

@@ -0,0 +1,436 @@
/* Parser implementation */
/* For a description, see the comments at end of this file */
/* XXX To do: error recovery */
#include "Python.h"
#include "pgenheaders.h"
#include "token.h"
#include "grammar.h"
#include "node.h"
#include "parser.h"
#include "errcode.h"
#ifdef Py_DEBUG
extern int Py_DebugFlag;
#define D(x) if (!Py_DebugFlag); else x
#else
#define D(x)
#endif
/* STACK DATA TYPE */
static void s_reset(stack *);
static void
s_reset(stack *s)
{
s->s_top = &s->s_base[MAXSTACK];
}
#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
static int
s_push(register stack *s, dfa *d, node *parent)
{
register stackentry *top;
if (s->s_top == s->s_base) {
fprintf(stderr, "s_push: parser stack overflow\n");
return E_NOMEM;
}
top = --s->s_top;
top->s_dfa = d;
top->s_parent = parent;
top->s_state = 0;
return 0;
}
#ifdef Py_DEBUG
static void
s_pop(register stack *s)
{
if (s_empty(s))
Py_FatalError("s_pop: parser stack underflow -- FATAL");
s->s_top++;
}
#else /* !Py_DEBUG */
#define s_pop(s) (s)->s_top++
#endif
/* PARSER CREATION */
parser_state *
PyParser_New(grammar *g, int start)
{
parser_state *ps;
if (!g->g_accel)
PyGrammar_AddAccelerators(g);
ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
if (ps == NULL)
return NULL;
ps->p_grammar = g;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
ps->p_flags = 0;
#endif
ps->p_tree = PyNode_New(start);
if (ps->p_tree == NULL) {
PyMem_FREE(ps);
return NULL;
}
s_reset(&ps->p_stack);
(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
return ps;
}
void
PyParser_Delete(parser_state *ps)
{
/* NB If you want to save the parse tree,
you must set p_tree to NULL before calling delparser! */
PyNode_Free(ps->p_tree);
PyMem_FREE(ps);
}
/* PARSER STACK OPERATIONS */
static int
shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
{
int err;
assert(!s_empty(s));
err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
return 0;
}
static int
push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
{
int err;
register node *n;
n = s->s_top->s_parent;
assert(!s_empty(s));
err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
return s_push(s, d, CHILD(n, NCH(n)-1));
}
/* PARSER PROPER */
static int
classify(parser_state *ps, int type, char *str)
{
grammar *g = ps->p_grammar;
register int n = g->g_ll.ll_nlabels;
if (type == NAME) {
register char *s = str;
register label *l = g->g_ll.ll_label;
register int i;
for (i = n; i > 0; i--, l++) {
if (l->lb_type != NAME || l->lb_str == NULL ||
l->lb_str[0] != s[0] ||
strcmp(l->lb_str, s) != 0)
continue;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
s[0] == 'p' && strcmp(s, "print") == 0) {
break; /* no longer a keyword */
}
#endif
D(printf("It's a keyword\n"));
return n - i;
}
}
{
register label *l = g->g_ll.ll_label;
register int i;
for (i = n; i > 0; i--, l++) {
if (l->lb_type == type && l->lb_str == NULL) {
D(printf("It's a token we know\n"));
return n - i;
}
}
}
D(printf("Illegal token\n"));
return -1;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
static void
future_hack(parser_state *ps)
{
node *n = ps->p_stack.s_top->s_parent;
node *ch, *cch;
int i;
/* from __future__ import ..., must have at least 4 children */
n = CHILD(n, 0);
if (NCH(n) < 4)
return;
ch = CHILD(n, 0);
if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
return;
ch = CHILD(n, 1);
if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
return;
ch = CHILD(n, 3);
/* ch can be a star, a parenthesis or import_as_names */
if (TYPE(ch) == STAR)
return;
if (TYPE(ch) == LPAR)
ch = CHILD(n, 4);
for (i = 0; i < NCH(ch); i += 2) {
cch = CHILD(ch, i);
if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
char *str_ch = STR(CHILD(cch, 0));
if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
}
}
}
}
#endif /* future keyword */
int
PyParser_AddToken(register parser_state *ps, register int type, char *str,
int lineno, int col_offset, int *expected_ret)
{
register int ilabel;
int err;
D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
/* Find out which label this token is */
ilabel = classify(ps, type, str);
if (ilabel < 0)
return E_SYNTAX;
/* Loop until the token is shifted or an error occurred */
for (;;) {
/* Fetch the current dfa and state */
register dfa *d = ps->p_stack.s_top->s_dfa;
register state *s = &d->d_state[ps->p_stack.s_top->s_state];
D(printf(" DFA '%s', state %d:",
d->d_name, ps->p_stack.s_top->s_state));
/* Check accelerator */
if (s->s_lower <= ilabel && ilabel < s->s_upper) {
register int x = s->s_accel[ilabel - s->s_lower];
if (x != -1) {
if (x & (1<<7)) {
/* Push non-terminal */
int nt = (x >> 8) + NT_OFFSET;
int arrow = x & ((1<<7)-1);
dfa *d1 = PyGrammar_FindDFA(
ps->p_grammar, nt);
if ((err = push(&ps->p_stack, nt, d1,
arrow, lineno, col_offset)) > 0) {
D(printf(" MemError: push\n"));
return err;
}
D(printf(" Push ...\n"));
continue;
}
/* Shift the token */
if ((err = shift(&ps->p_stack, type, str,
x, lineno, col_offset)) > 0) {
D(printf(" MemError: shift.\n"));
return err;
}
D(printf(" Shift.\n"));
/* Pop while we are in an accept-only state */
while (s = &d->d_state
[ps->p_stack.s_top->s_state],
s->s_accept && s->s_narcs == 1) {
D(printf(" DFA '%s', state %d: "
"Direct pop.\n",
d->d_name,
ps->p_stack.s_top->s_state));
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (d->d_name[0] == 'i' &&
strcmp(d->d_name,
"import_stmt") == 0)
future_hack(ps);
#endif
s_pop(&ps->p_stack);
if (s_empty(&ps->p_stack)) {
D(printf(" ACCEPT.\n"));
return E_DONE;
}
d = ps->p_stack.s_top->s_dfa;
}
return E_OK;
}
}
if (s->s_accept) {
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (d->d_name[0] == 'i' &&
strcmp(d->d_name, "import_stmt") == 0)
future_hack(ps);
#endif
/* Pop this dfa and try again */
s_pop(&ps->p_stack);
D(printf(" Pop ...\n"));
if (s_empty(&ps->p_stack)) {
D(printf(" Error: bottom of stack.\n"));
return E_SYNTAX;
}
continue;
}
/* Stuck, report syntax error */
D(printf(" Error.\n"));
if (expected_ret) {
if (s->s_lower == s->s_upper - 1) {
/* Only one possible expected token */
*expected_ret = ps->p_grammar->
g_ll.ll_label[s->s_lower].lb_type;
}
else
*expected_ret = -1;
}
return E_SYNTAX;
}
}
#ifdef Py_DEBUG
/* DEBUG OUTPUT */
void
dumptree(grammar *g, node *n)
{
int i;
if (n == NULL)
printf("NIL");
else {
label l;
l.lb_type = TYPE(n);
l.lb_str = STR(n);
printf("%s", PyGrammar_LabelRepr(&l));
if (ISNONTERMINAL(TYPE(n))) {
printf("(");
for (i = 0; i < NCH(n); i++) {
if (i > 0)
printf(",");
dumptree(g, CHILD(n, i));
}
printf(")");
}
}
}
void
showtree(grammar *g, node *n)
{
int i;
if (n == NULL)
return;
if (ISNONTERMINAL(TYPE(n))) {
for (i = 0; i < NCH(n); i++)
showtree(g, CHILD(n, i));
}
else if (ISTERMINAL(TYPE(n))) {
printf("%s", _PyParser_TokenNames[TYPE(n)]);
if (TYPE(n) == NUMBER || TYPE(n) == NAME)
printf("(%s)", STR(n));
printf(" ");
}
else
printf("? ");
}
void
printtree(parser_state *ps)
{
if (Py_DebugFlag) {
printf("Parse tree:\n");
dumptree(ps->p_grammar, ps->p_tree);
printf("\n");
printf("Tokens:\n");
showtree(ps->p_grammar, ps->p_tree);
printf("\n");
}
printf("Listing:\n");
PyNode_ListTree(ps->p_tree);
printf("\n");
}
#endif /* Py_DEBUG */
/*
Description
-----------
The parser's interface is different than usual: the function addtoken()
must be called for each token in the input. This makes it possible to
turn it into an incremental parsing system later. The parsing system
constructs a parse tree as it goes.
A parsing rule is represented as a Deterministic Finite-state Automaton
(DFA). A node in a DFA represents a state of the parser; an arc represents
a transition. Transitions are either labeled with terminal symbols or
with non-terminals. When the parser decides to follow an arc labeled
with a non-terminal, it is invoked recursively with the DFA representing
the parsing rule for that as its initial state; when that DFA accepts,
the parser that invoked it continues. The parse tree constructed by the
recursively called parser is inserted as a child in the current parse tree.
The DFA's can be constructed automatically from a more conventional
language description. An extended LL(1) grammar (ELL(1)) is suitable.
Certain restrictions make the parser's life easier: rules that can produce
the empty string should be outlawed (there are other ways to put loops
or optional parts in the language). To avoid the need to construct
FIRST sets, we can require that all but the last alternative of a rule
(really: arc going out of a DFA's state) must begin with a terminal
symbol.
As an example, consider this grammar:
expr: term (OP term)*
term: CONSTANT | '(' expr ')'
The DFA corresponding to the rule for expr is:
------->.---term-->.------->
^ |
| |
\----OP----/
The parse tree generated for the input a+b is:
(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
*/

View File

@@ -0,0 +1,42 @@
#ifndef Py_PARSER_H
#define Py_PARSER_H
#ifdef __cplusplus
extern "C" {
#endif
/* Parser interface */
#define MAXSTACK 1500
typedef struct {
int s_state; /* State in current DFA */
dfa *s_dfa; /* Current DFA */
struct _node *s_parent; /* Where to add next node */
} stackentry;
typedef struct {
stackentry *s_top; /* Top entry */
stackentry s_base[MAXSTACK];/* Array of stack entries */
/* NB The stack grows down */
} stack;
typedef struct {
stack p_stack; /* Stack of parser states */
grammar *p_grammar; /* Grammar to use */
node *p_tree; /* Top of parse tree */
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
unsigned long p_flags; /* see co_flags in Include/code.h */
#endif
} parser_state;
parser_state *PyParser_New(grammar *g, int start);
void PyParser_Delete(parser_state *ps);
int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
int *expected_ret);
void PyGrammar_AddAccelerators(grammar *g);
#ifdef __cplusplus
}
#endif
#endif /* !Py_PARSER_H */

View File

@@ -0,0 +1,283 @@
/* Parser-tokenizer link implementation */
#include "pgenheaders.h"
#include "tokenizer.h"
#include "node.h"
#include "grammar.h"
#include "parser.h"
#include "parsetok.h"
#include "errcode.h"
#include "graminit.h"
int Py_TabcheckFlag;
/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static void initerr(perrdetail *err_ret, const char* filename);
/* Parse input coming from a string. Return error code, print some errors. */
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
{
return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
}
node *
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
perrdetail *err_ret, int flags)
{
return PyParser_ParseStringFlagsFilename(s, NULL,
g, start, err_ret, flags);
}
node *
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
grammar *g, int start,
perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
err_ret, &iflags);
}
node *
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
grammar *g, int start,
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
initerr(err_ret, filename);
if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
tok->filename = filename ? filename : "<string>";
if (Py_TabcheckFlag || Py_VerboseFlag) {
tok->altwarning = (tok->filename != NULL);
if (Py_TabcheckFlag >= 2)
tok->alterror++;
}
return parsetok(tok, g, start, err_ret, flags);
}
/* Parse input coming from a file. Return error code, print some errors. */
node *
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
char *ps1, char *ps2, perrdetail *err_ret)
{
return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2,
err_ret, 0);
}
node *
PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
char *ps1, char *ps2, perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);
}
node *
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start,
char *ps1, char *ps2, perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
initerr(err_ret, filename);
if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM;
return NULL;
}
tok->filename = filename;
if (Py_TabcheckFlag || Py_VerboseFlag) {
tok->altwarning = (filename != NULL);
if (Py_TabcheckFlag >= 2)
tok->alterror++;
}
return parsetok(tok, g, start, err_ret, flags);
}
#if 0
static char with_msg[] =
"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
static char as_msg[] =
"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
static void
warn(const char *msg, const char *filename, int lineno)
{
if (filename == NULL)
filename = "<string>";
PySys_WriteStderr(msg, filename, lineno);
}
#endif
/* Parse input coming from the given tokenizer structure.
Return error code. */
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
int *flags)
{
parser_state *ps;
node *n;
int started = 0, handling_import = 0, handling_with = 0;
if ((ps = PyParser_New(g, start)) == NULL) {
fprintf(stderr, "no mem for new parser\n");
err_ret->error = E_NOMEM;
PyTokenizer_Free(tok);
return NULL;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
}
if (*flags & PyPARSE_UNICODE_LITERALS) {
ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
}
#endif
for (;;) {
char *a, *b;
int type;
size_t len;
char *str;
int col_offset;
type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
err_ret->error = tok->done;
break;
}
if (type == ENDMARKER && started) {
type = NEWLINE; /* Add an extra newline */
handling_with = handling_import = 0;
started = 0;
/* Add the right number of dedent tokens,
except if a certain flag is given --
codeop.py uses this. */
if (tok->indent &&
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
{
tok->pendin = -tok->indent;
tok->indent = 0;
}
}
else
started = 1;
len = b - a; /* XXX this may compute NULL - NULL */
str = (char *) PyObject_MALLOC(len + 1);
if (str == NULL) {
fprintf(stderr, "no mem for next token\n");
err_ret->error = E_NOMEM;
break;
}
if (len > 0)
strncpy(str, a, len);
str[len] = '\0';
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#endif
if (a >= tok->line_start)
col_offset = a - tok->line_start;
else
col_offset = -1;
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
err_ret->token = type;
}
break;
}
}
if (err_ret->error == E_DONE) {
n = ps->p_tree;
ps->p_tree = NULL;
}
else
n = NULL;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
*flags = ps->p_flags;
#endif
PyParser_Delete(ps);
if (n == NULL) {
if (tok->lineno <= 1 && tok->done == E_EOF)
err_ret->error = E_EOF;
err_ret->lineno = tok->lineno;
if (tok->buf != NULL) {
char *text = NULL;
size_t len;
assert(tok->cur - tok->buf < INT_MAX);
err_ret->offset = (int)(tok->cur - tok->buf);
len = tok->inp - tok->buf;
#ifdef Py_USING_UNICODE
text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
#endif
if (text == NULL) {
text = (char *) PyObject_MALLOC(len + 1);
if (text != NULL) {
if (len > 0)
strncpy(text, tok->buf, len);
text[len] = '\0';
}
}
err_ret->text = text;
}
} else if (tok->encoding != NULL) {
/* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
* allocated using PyMem_
*/
node* r = PyNode_New(encoding_decl);
if (r)
r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
if (!r || !r->n_str) {
err_ret->error = E_NOMEM;
if (r)
PyObject_FREE(r);
n = NULL;
goto done;
}
strcpy(r->n_str, tok->encoding);
PyMem_FREE(tok->encoding);
tok->encoding = NULL;
r->n_nchildren = 1;
r->n_child = n;
n = r;
}
done:
PyTokenizer_Free(tok);
return n;
}
static void
initerr(perrdetail *err_ret, const char *filename)
{
err_ret->error = E_OK;
err_ret->filename = filename;
err_ret->lineno = 0;
err_ret->offset = 0;
err_ret->text = NULL;
err_ret->token = -1;
err_ret->expected = -1;
}

View File

@@ -0,0 +1,708 @@
/* Parser generator */
/* For a description, see the comments at end of this file */
#include "Python.h"
#include "pgenheaders.h"
#include "token.h"
#include "node.h"
#include "grammar.h"
#include "metagrammar.h"
#include "pgen.h"
extern int Py_DebugFlag;
extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
typedef struct _nfaarc {
int ar_label;
int ar_arrow;
} nfaarc;
typedef struct _nfastate {
int st_narcs;
nfaarc *st_arc;
} nfastate;
typedef struct _nfa {
int nf_type;
char *nf_name;
int nf_nstates;
nfastate *nf_state;
int nf_start, nf_finish;
} nfa;
/* Forward */
static void compile_rhs(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static void compile_alt(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static void compile_item(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static void compile_atom(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static int
addnfastate(nfa *nf)
{
nfastate *st;
nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
sizeof(nfastate) * (nf->nf_nstates + 1));
if (nf->nf_state == NULL)
Py_FatalError("out of mem");
st = &nf->nf_state[nf->nf_nstates++];
st->st_narcs = 0;
st->st_arc = NULL;
return st - nf->nf_state;
}
static void
addnfaarc(nfa *nf, int from, int to, int lbl)
{
nfastate *st;
nfaarc *ar;
st = &nf->nf_state[from];
st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
sizeof(nfaarc) * (st->st_narcs + 1));
if (st->st_arc == NULL)
Py_FatalError("out of mem");
ar = &st->st_arc[st->st_narcs++];
ar->ar_label = lbl;
ar->ar_arrow = to;
}
static nfa *
newnfa(char *name)
{
nfa *nf;
static int type = NT_OFFSET; /* All types will be disjunct */
nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
if (nf == NULL)
Py_FatalError("no mem for new nfa");
nf->nf_type = type++;
nf->nf_name = name; /* XXX strdup(name) ??? */
nf->nf_nstates = 0;
nf->nf_state = NULL;
nf->nf_start = nf->nf_finish = -1;
return nf;
}
typedef struct _nfagrammar {
int gr_nnfas;
nfa **gr_nfa;
labellist gr_ll;
} nfagrammar;
/* Forward */
static void compile_rule(nfagrammar *gr, node *n);
static nfagrammar *
newnfagrammar(void)
{
nfagrammar *gr;
gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
if (gr == NULL)
Py_FatalError("no mem for new nfa grammar");
gr->gr_nnfas = 0;
gr->gr_nfa = NULL;
gr->gr_ll.ll_nlabels = 0;
gr->gr_ll.ll_label = NULL;
addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
return gr;
}
static nfa *
addnfa(nfagrammar *gr, char *name)
{
nfa *nf;
nf = newnfa(name);
gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
sizeof(nfa*) * (gr->gr_nnfas + 1));
if (gr->gr_nfa == NULL)
Py_FatalError("out of mem");
gr->gr_nfa[gr->gr_nnfas++] = nf;
addlabel(&gr->gr_ll, NAME, nf->nf_name);
return nf;
}
#ifdef Py_DEBUG
static char REQNFMT[] = "metacompile: less than %d children\n";
#define REQN(i, count) \
if (i < count) { \
fprintf(stderr, REQNFMT, count); \
Py_FatalError("REQN"); \
} else
#else
#define REQN(i, count) /* empty */
#endif
static nfagrammar *
metacompile(node *n)
{
nfagrammar *gr;
int i;
if (Py_DebugFlag)
printf("Compiling (meta-) parse tree into NFA grammar\n");
gr = newnfagrammar();
REQ(n, MSTART);
i = n->n_nchildren - 1; /* Last child is ENDMARKER */
n = n->n_child;
for (; --i >= 0; n++) {
if (n->n_type != NEWLINE)
compile_rule(gr, n);
}
return gr;
}
static void
compile_rule(nfagrammar *gr, node *n)
{
nfa *nf;
REQ(n, RULE);
REQN(n->n_nchildren, 4);
n = n->n_child;
REQ(n, NAME);
nf = addnfa(gr, n->n_str);
n++;
REQ(n, COLON);
n++;
REQ(n, RHS);
compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
n++;
REQ(n, NEWLINE);
}
static void
compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
int a, b;
REQ(n, RHS);
i = n->n_nchildren;
REQN(i, 1);
n = n->n_child;
REQ(n, ALT);
compile_alt(ll, nf, n, pa, pb);
if (--i <= 0)
return;
n++;
a = *pa;
b = *pb;
*pa = addnfastate(nf);
*pb = addnfastate(nf);
addnfaarc(nf, *pa, a, EMPTY);
addnfaarc(nf, b, *pb, EMPTY);
for (; --i >= 0; n++) {
REQ(n, VBAR);
REQN(i, 1);
--i;
n++;
REQ(n, ALT);
compile_alt(ll, nf, n, &a, &b);
addnfaarc(nf, *pa, a, EMPTY);
addnfaarc(nf, b, *pb, EMPTY);
}
}
static void
compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
int a, b;
REQ(n, ALT);
i = n->n_nchildren;
REQN(i, 1);
n = n->n_child;
REQ(n, ITEM);
compile_item(ll, nf, n, pa, pb);
--i;
n++;
for (; --i >= 0; n++) {
REQ(n, ITEM);
compile_item(ll, nf, n, &a, &b);
addnfaarc(nf, *pb, a, EMPTY);
*pb = b;
}
}
static void
compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
int a, b;
REQ(n, ITEM);
i = n->n_nchildren;
REQN(i, 1);
n = n->n_child;
if (n->n_type == LSQB) {
REQN(i, 3);
n++;
REQ(n, RHS);
*pa = addnfastate(nf);
*pb = addnfastate(nf);
addnfaarc(nf, *pa, *pb, EMPTY);
compile_rhs(ll, nf, n, &a, &b);
addnfaarc(nf, *pa, a, EMPTY);
addnfaarc(nf, b, *pb, EMPTY);
REQN(i, 1);
n++;
REQ(n, RSQB);
}
else {
compile_atom(ll, nf, n, pa, pb);
if (--i <= 0)
return;
n++;
addnfaarc(nf, *pb, *pa, EMPTY);
if (n->n_type == STAR)
*pb = *pa;
else
REQ(n, PLUS);
}
}
static void
compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
REQ(n, ATOM);
i = n->n_nchildren;
REQN(i, 1);
n = n->n_child;
if (n->n_type == LPAR) {
REQN(i, 3);
n++;
REQ(n, RHS);
compile_rhs(ll, nf, n, pa, pb);
n++;
REQ(n, RPAR);
}
else if (n->n_type == NAME || n->n_type == STRING) {
*pa = addnfastate(nf);
*pb = addnfastate(nf);
addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
}
else
REQ(n, NAME);
}
static void
dumpstate(labellist *ll, nfa *nf, int istate)
{
nfastate *st;
int i;
nfaarc *ar;
printf("%c%2d%c",
istate == nf->nf_start ? '*' : ' ',
istate,
istate == nf->nf_finish ? '.' : ' ');
st = &nf->nf_state[istate];
ar = st->st_arc;
for (i = 0; i < st->st_narcs; i++) {
if (i > 0)
printf("\n ");
printf("-> %2d %s", ar->ar_arrow,
PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
ar++;
}
printf("\n");
}
static void
dumpnfa(labellist *ll, nfa *nf)
{
int i;
printf("NFA '%s' has %d states; start %d, finish %d\n",
nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
for (i = 0; i < nf->nf_nstates; i++)
dumpstate(ll, nf, i);
}
/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
static void
addclosure(bitset ss, nfa *nf, int istate)
{
if (addbit(ss, istate)) {
nfastate *st = &nf->nf_state[istate];
nfaarc *ar = st->st_arc;
int i;
for (i = st->st_narcs; --i >= 0; ) {
if (ar->ar_label == EMPTY)
addclosure(ss, nf, ar->ar_arrow);
ar++;
}
}
}
typedef struct _ss_arc {
bitset sa_bitset;
int sa_arrow;
int sa_label;
} ss_arc;
typedef struct _ss_state {
bitset ss_ss;
int ss_narcs;
struct _ss_arc *ss_arc;
int ss_deleted;
int ss_finish;
int ss_rename;
} ss_state;
typedef struct _ss_dfa {
int sd_nstates;
ss_state *sd_state;
} ss_dfa;
/* Forward */
static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
labellist *ll, char *msg);
static void simplify(int xx_nstates, ss_state *xx_state);
static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
static void
makedfa(nfagrammar *gr, nfa *nf, dfa *d)
{
int nbits = nf->nf_nstates;
bitset ss;
int xx_nstates;
ss_state *xx_state, *yy;
ss_arc *zz;
int istate, jstate, iarc, jarc, ibit;
nfastate *st;
nfaarc *ar;
ss = newbitset(nbits);
addclosure(ss, nf, nf->nf_start);
xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
if (xx_state == NULL)
Py_FatalError("no mem for xx_state in makedfa");
xx_nstates = 1;
yy = &xx_state[0];
yy->ss_ss = ss;
yy->ss_narcs = 0;
yy->ss_arc = NULL;
yy->ss_deleted = 0;
yy->ss_finish = testbit(ss, nf->nf_finish);
if (yy->ss_finish)
printf("Error: nonterminal '%s' may produce empty.\n",
nf->nf_name);
/* This algorithm is from a book written before
the invention of structured programming... */
/* For each unmarked state... */
for (istate = 0; istate < xx_nstates; ++istate) {
size_t size;
yy = &xx_state[istate];
ss = yy->ss_ss;
/* For all its states... */
for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
if (!testbit(ss, ibit))
continue;
st = &nf->nf_state[ibit];
/* For all non-empty arcs from this state... */
for (iarc = 0; iarc < st->st_narcs; iarc++) {
ar = &st->st_arc[iarc];
if (ar->ar_label == EMPTY)
continue;
/* Look up in list of arcs from this state */
for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
zz = &yy->ss_arc[jarc];
if (ar->ar_label == zz->sa_label)
goto found;
}
/* Add new arc for this state */
size = sizeof(ss_arc) * (yy->ss_narcs + 1);
yy->ss_arc = (ss_arc *)PyObject_REALLOC(
yy->ss_arc, size);
if (yy->ss_arc == NULL)
Py_FatalError("out of mem");
zz = &yy->ss_arc[yy->ss_narcs++];
zz->sa_label = ar->ar_label;
zz->sa_bitset = newbitset(nbits);
zz->sa_arrow = -1;
found: ;
/* Add destination */
addclosure(zz->sa_bitset, nf, ar->ar_arrow);
}
}
/* Now look up all the arrow states */
for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
zz = &xx_state[istate].ss_arc[jarc];
for (jstate = 0; jstate < xx_nstates; jstate++) {
if (samebitset(zz->sa_bitset,
xx_state[jstate].ss_ss, nbits)) {
zz->sa_arrow = jstate;
goto done;
}
}
size = sizeof(ss_state) * (xx_nstates + 1);
xx_state = (ss_state *)PyObject_REALLOC(xx_state,
size);
if (xx_state == NULL)
Py_FatalError("out of mem");
zz->sa_arrow = xx_nstates;
yy = &xx_state[xx_nstates++];
yy->ss_ss = zz->sa_bitset;
yy->ss_narcs = 0;
yy->ss_arc = NULL;
yy->ss_deleted = 0;
yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
done: ;
}
}
if (Py_DebugFlag)
printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
"before minimizing");
simplify(xx_nstates, xx_state);
if (Py_DebugFlag)
printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
"after minimizing");
convert(d, xx_nstates, xx_state);
/* XXX cleanup */
PyObject_FREE(xx_state);
}
static void
printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
labellist *ll, char *msg)
{
int i, ibit, iarc;
ss_state *yy;
ss_arc *zz;
printf("Subset DFA %s\n", msg);
for (i = 0; i < xx_nstates; i++) {
yy = &xx_state[i];
if (yy->ss_deleted)
continue;
printf(" Subset %d", i);
if (yy->ss_finish)
printf(" (finish)");
printf(" { ");
for (ibit = 0; ibit < nbits; ibit++) {
if (testbit(yy->ss_ss, ibit))
printf("%d ", ibit);
}
printf("}\n");
for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
zz = &yy->ss_arc[iarc];
printf(" Arc to state %d, label %s\n",
zz->sa_arrow,
PyGrammar_LabelRepr(
&ll->ll_label[zz->sa_label]));
}
}
}
/* PART THREE -- SIMPLIFY DFA */
/* Simplify the DFA by repeatedly eliminating states that are
equivalent to another oner. This is NOT Algorithm 3.3 from
[Aho&Ullman 77]. It does not always finds the minimal DFA,
but it does usually make a much smaller one... (For an example
of sub-optimal behavior, try S: x a b+ | y a b+.)
*/
static int
samestate(ss_state *s1, ss_state *s2)
{
int i;
if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
return 0;
for (i = 0; i < s1->ss_narcs; i++) {
if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
return 0;
}
return 1;
}
static void
renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
{
int i, j;
if (Py_DebugFlag)
printf("Rename state %d to %d.\n", from, to);
for (i = 0; i < xx_nstates; i++) {
if (xx_state[i].ss_deleted)
continue;
for (j = 0; j < xx_state[i].ss_narcs; j++) {
if (xx_state[i].ss_arc[j].sa_arrow == from)
xx_state[i].ss_arc[j].sa_arrow = to;
}
}
}
static void
simplify(int xx_nstates, ss_state *xx_state)
{
int changes;
int i, j;
do {
changes = 0;
for (i = 1; i < xx_nstates; i++) {
if (xx_state[i].ss_deleted)
continue;
for (j = 0; j < i; j++) {
if (xx_state[j].ss_deleted)
continue;
if (samestate(&xx_state[i], &xx_state[j])) {
xx_state[i].ss_deleted++;
renamestates(xx_nstates, xx_state,
i, j);
changes++;
break;
}
}
}
} while (changes);
}
/* PART FOUR -- GENERATE PARSING TABLES */
/* Convert the DFA into a grammar that can be used by our parser */
static void
convert(dfa *d, int xx_nstates, ss_state *xx_state)
{
int i, j;
ss_state *yy;
ss_arc *zz;
for (i = 0; i < xx_nstates; i++) {
yy = &xx_state[i];
if (yy->ss_deleted)
continue;
yy->ss_rename = addstate(d);
}
for (i = 0; i < xx_nstates; i++) {
yy = &xx_state[i];
if (yy->ss_deleted)
continue;
for (j = 0; j < yy->ss_narcs; j++) {
zz = &yy->ss_arc[j];
addarc(d, yy->ss_rename,
xx_state[zz->sa_arrow].ss_rename,
zz->sa_label);
}
if (yy->ss_finish)
addarc(d, yy->ss_rename, yy->ss_rename, 0);
}
d->d_initial = 0;
}
/* PART FIVE -- GLUE IT ALL TOGETHER */
static grammar *
maketables(nfagrammar *gr)
{
int i;
nfa *nf;
dfa *d;
grammar *g;
if (gr->gr_nnfas == 0)
return NULL;
g = newgrammar(gr->gr_nfa[0]->nf_type);
/* XXX first rule must be start rule */
g->g_ll = gr->gr_ll;
for (i = 0; i < gr->gr_nnfas; i++) {
nf = gr->gr_nfa[i];
if (Py_DebugFlag) {
printf("Dump of NFA for '%s' ...\n", nf->nf_name);
dumpnfa(&gr->gr_ll, nf);
printf("Making DFA for '%s' ...\n", nf->nf_name);
}
d = adddfa(g, nf->nf_type, nf->nf_name);
makedfa(gr, gr->gr_nfa[i], d);
}
return g;
}
grammar *
pgen(node *n)
{
nfagrammar *gr;
grammar *g;
gr = metacompile(n);
g = maketables(gr);
translatelabels(g);
addfirstsets(g);
PyObject_FREE(gr);
return g;
}
grammar *
Py_pgen(node *n)
{
return pgen(n);
}
/*
Description
-----------
Input is a grammar in extended BNF (using * for repetition, + for
at-least-once repetition, [] for optional parts, | for alternatives and
() for grouping). This has already been parsed and turned into a parse
tree.
Each rule is considered as a regular expression in its own right.
It is turned into a Non-deterministic Finite Automaton (NFA), which
is then turned into a Deterministic Finite Automaton (DFA), which is then
optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
or similar compiler books (this technique is more often used for lexical
analyzers).
The DFA's are used by the parser as parsing tables in a special way
that's probably unique. Before they are usable, the FIRST sets of all
non-terminals are computed.
Reference
---------
[Aho&Ullman 77]
Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
(first edition)
*/

View File

@@ -0,0 +1,173 @@
/* Parser generator main program */
/* This expects a filename containing the grammar as argv[1] (UNIX)
or asks the console for such a file name (THINK C).
It writes its output on two files in the current directory:
- "graminit.c" gets the grammar as a bunch of initialized data
- "graminit.h" gets the grammar's non-terminals as #defines.
Error messages and status info during the generation process are
written to stdout, or sometimes to stderr. */
/* XXX TO DO:
- check for duplicate definitions of names (instead of fatal err)
*/
#include "Python.h"
#include "pgenheaders.h"
#include "grammar.h"
#include "node.h"
#include "parsetok.h"
#include "pgen.h"
int Py_DebugFlag;
int Py_VerboseFlag;
int Py_IgnoreEnvironmentFlag;
/* Forward */
grammar *getgrammar(char *filename);
void
Py_Exit(int sts)
{
exit(sts);
}
int
main(int argc, char **argv)
{
grammar *g;
FILE *fp;
char *filename, *graminit_h, *graminit_c;
if (argc != 4) {
fprintf(stderr,
"usage: %s grammar graminit.h graminit.c\n", argv[0]);
Py_Exit(2);
}
filename = argv[1];
graminit_h = argv[2];
graminit_c = argv[3];
g = getgrammar(filename);
fp = fopen(graminit_c, "w");
if (fp == NULL) {
perror(graminit_c);
Py_Exit(1);
}
if (Py_DebugFlag)
printf("Writing %s ...\n", graminit_c);
printgrammar(g, fp);
fclose(fp);
fp = fopen(graminit_h, "w");
if (fp == NULL) {
perror(graminit_h);
Py_Exit(1);
}
if (Py_DebugFlag)
printf("Writing %s ...\n", graminit_h);
printnonterminals(g, fp);
fclose(fp);
Py_Exit(0);
return 0; /* Make gcc -Wall happy */
}
grammar *
getgrammar(char *filename)
{
FILE *fp;
node *n;
grammar *g0, *g;
perrdetail err;
fp = fopen(filename, "r");
if (fp == NULL) {
perror(filename);
Py_Exit(1);
}
g0 = meta_grammar();
n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
(char *)NULL, (char *)NULL, &err);
fclose(fp);
if (n == NULL) {
fprintf(stderr, "Parsing error %d, line %d.\n",
err.error, err.lineno);
if (err.text != NULL) {
size_t i;
fprintf(stderr, "%s", err.text);
i = strlen(err.text);
if (i == 0 || err.text[i-1] != '\n')
fprintf(stderr, "\n");
for (i = 0; i < err.offset; i++) {
if (err.text[i] == '\t')
putc('\t', stderr);
else
putc(' ', stderr);
}
fprintf(stderr, "^\n");
PyObject_FREE(err.text);
}
Py_Exit(1);
}
g = pgen(n);
if (g == NULL) {
printf("Bad grammar.\n");
Py_Exit(1);
}
return g;
}
/* Can't happen in pgen */
PyObject*
PyErr_Occurred()
{
return 0;
}
void
Py_FatalError(const char *msg)
{
fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
Py_Exit(1);
}
/* No-nonsense my_readline() for tokenizer.c */
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
size_t n = 1000;
char *p = (char *)PyMem_MALLOC(n);
char *q;
if (p == NULL)
return NULL;
fprintf(stderr, "%s", prompt);
q = fgets(p, n, sys_stdin);
if (q == NULL) {
*p = '\0';
return p;
}
n = strlen(p);
if (n > 0 && p[n-1] != '\n')
p[n-1] = '\n';
return (char *)PyMem_REALLOC(p, n+1);
}
/* No-nonsense fgets */
char *
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
{
return fgets(buf, n, stream);
}
#include <stdarg.h>
void
PySys_WriteStderr(const char *format, ...)
{
va_list va;
va_start(va, format);
vfprintf(stderr, format, va);
va_end(va);
}

View File

@@ -0,0 +1,117 @@
/* Print a bunch of C initializers that represent a grammar */
#include "pgenheaders.h"
#include "grammar.h"
/* Forward */
static void printarcs(int, dfa *, FILE *);
static void printstates(grammar *, FILE *);
static void printdfas(grammar *, FILE *);
static void printlabels(grammar *, FILE *);
void
printgrammar(grammar *g, FILE *fp)
{
fprintf(fp, "/* Generated by Parser/pgen */\n\n");
fprintf(fp, "#include \"pgenheaders.h\"\n");
fprintf(fp, "#include \"grammar.h\"\n");
fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");
printdfas(g, fp);
printlabels(g, fp);
fprintf(fp, "grammar _PyParser_Grammar = {\n");
fprintf(fp, " %d,\n", g->g_ndfas);
fprintf(fp, " dfas,\n");
fprintf(fp, " {%d, labels},\n", g->g_ll.ll_nlabels);
fprintf(fp, " %d\n", g->g_start);
fprintf(fp, "};\n");
}
void
printnonterminals(grammar *g, FILE *fp)
{
dfa *d;
int i;
fprintf(fp, "/* Generated by Parser/pgen */\n\n");
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++)
fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
}
static void
printarcs(int i, dfa *d, FILE *fp)
{
arc *a;
state *s;
int j, k;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++) {
fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
i, j, s->s_narcs);
a = s->s_arc;
for (k = 0; k < s->s_narcs; k++, a++)
fprintf(fp, " {%d, %d},\n", a->a_lbl, a->a_arrow);
fprintf(fp, "};\n");
}
}
static void
printstates(grammar *g, FILE *fp)
{
state *s;
dfa *d;
int i, j;
d = g->g_dfa;
for (i = 0; i < g->g_ndfas; i++, d++) {
printarcs(i, d, fp);
fprintf(fp, "static state states_%d[%d] = {\n",
i, d->d_nstates);
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++)
fprintf(fp, " {%d, arcs_%d_%d},\n",
s->s_narcs, i, j);
fprintf(fp, "};\n");
}
}
static void
printdfas(grammar *g, FILE *fp)
{
dfa *d;
int i, j;
printstates(g, fp);
fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
d = g->g_dfa;
for (i = 0; i < g->g_ndfas; i++, d++) {
fprintf(fp, " {%d, \"%s\", %d, %d, states_%d,\n",
d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
fprintf(fp, " \"");
for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
fprintf(fp, "\"},\n");
}
fprintf(fp, "};\n");
}
static void
printlabels(grammar *g, FILE *fp)
{
label *l;
int i;
fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
l = g->g_ll.ll_label;
for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
if (l->lb_str == NULL)
fprintf(fp, " {%d, 0},\n", l->lb_type);
else
fprintf(fp, " {%d, \"%s\"},\n",
l->lb_type, l->lb_str);
}
fprintf(fp, "};\n");
}

View File

@@ -0,0 +1,839 @@
# Copyright (c) 1998-2002 John Aycock
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
__version__ = 'SPARK-0.7 (pre-alpha-5)'
import re
import string
def _namelist(instance):
namelist, namedict, classlist = [], {}, [instance.__class__]
for c in classlist:
for b in c.__bases__:
classlist.append(b)
for name in c.__dict__.keys():
if not namedict.has_key(name):
namelist.append(name)
namedict[name] = 1
return namelist
class GenericScanner:
def __init__(self, flags=0):
pattern = self.reflect()
self.re = re.compile(pattern, re.VERBOSE|flags)
self.index2func = {}
for name, number in self.re.groupindex.items():
self.index2func[number-1] = getattr(self, 't_' + name)
def makeRE(self, name):
doc = getattr(self, name).__doc__
rv = '(?P<%s>%s)' % (name[2:], doc)
return rv
def reflect(self):
rv = []
for name in _namelist(self):
if name[:2] == 't_' and name != 't_default':
rv.append(self.makeRE(name))
rv.append(self.makeRE('t_default'))
return string.join(rv, '|')
def error(self, s, pos):
print "Lexical error at position %s" % pos
raise SystemExit
def tokenize(self, s):
pos = 0
n = len(s)
while pos < n:
m = self.re.match(s, pos)
if m is None:
self.error(s, pos)
groups = m.groups()
for i in range(len(groups)):
if groups[i] and self.index2func.has_key(i):
self.index2func[i](groups[i])
pos = m.end()
def t_default(self, s):
r'( . | \n )+'
print "Specification error: unmatched input"
raise SystemExit
#
# Extracted from GenericParser and made global so that [un]picking works.
#
class _State:
def __init__(self, stateno, items):
self.T, self.complete, self.items = [], [], items
self.stateno = stateno
class GenericParser:
#
# An Earley parser, as per J. Earley, "An Efficient Context-Free
# Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
# "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
# Carnegie-Mellon University, August 1968. New formulation of
# the parser according to J. Aycock, "Practical Earley Parsing
# and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
# 2001, and J. Aycock and R. N. Horspool, "Practical Earley
# Parsing", unpublished paper, 2001.
#
def __init__(self, start):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
self.augment(start)
self.ruleschanged = 1
_NULLABLE = '\e_'
_START = 'START'
_BOF = '|-'
#
# When pickling, take the time to generate the full state machine;
# some information is then extraneous, too. Unfortunately we
# can't save the rule2func map.
#
def __getstate__(self):
if self.ruleschanged:
#
# XXX - duplicated from parse()
#
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
#
# XXX - should find a better way to do this..
#
changes = 1
while changes:
changes = 0
for k, v in self.edges.items():
if v is None:
state, sym = k
if self.states.has_key(state):
self.goto(state, sym)
changes = 1
rv = self.__dict__.copy()
for s in self.states.values():
del s.items
del rv['rule2func']
del rv['nullable']
del rv['cores']
return rv
def __setstate__(self, D):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
start = D['rules'][self._START][0][1][1] # Blech.
self.augment(start)
D['rule2func'] = self.rule2func
D['makeSet'] = self.makeSet_fast
self.__dict__ = D
#
# A hook for GenericASTBuilder and GenericASTMatcher. Mess
# thee not with this; nor shall thee toucheth the _preprocess
# argument to addRule.
#
def preprocess(self, rule, func): return rule, func
def addRule(self, doc, func, _preprocess=1):
fn = func
rules = string.split(doc)
index = []
for i in range(len(rules)):
if rules[i] == '::=':
index.append(i-1)
index.append(len(rules))
for i in range(len(index)-1):
lhs = rules[index[i]]
rhs = rules[index[i]+2:index[i+1]]
rule = (lhs, tuple(rhs))
if _preprocess:
rule, fn = self.preprocess(rule, func)
if self.rules.has_key(lhs):
self.rules[lhs].append(rule)
else:
self.rules[lhs] = [ rule ]
self.rule2func[rule] = fn
self.rule2name[rule] = func.__name__[2:]
self.ruleschanged = 1
def collectRules(self):
for name in _namelist(self):
if name[:2] == 'p_':
func = getattr(self, name)
doc = func.__doc__
self.addRule(doc, func)
def augment(self, start):
rule = '%s ::= %s %s' % (self._START, self._BOF, start)
self.addRule(rule, lambda args: args[1], 0)
def computeNull(self):
self.nullable = {}
tbd = []
for rulelist in self.rules.values():
lhs = rulelist[0][0]
self.nullable[lhs] = 0
for rule in rulelist:
rhs = rule[1]
if len(rhs) == 0:
self.nullable[lhs] = 1
continue
#
# We only need to consider rules which
# consist entirely of nonterminal symbols.
# This should be a savings on typical
# grammars.
#
for sym in rhs:
if not self.rules.has_key(sym):
break
else:
tbd.append(rule)
changes = 1
while changes:
changes = 0
for lhs, rhs in tbd:
if self.nullable[lhs]:
continue
for sym in rhs:
if not self.nullable[sym]:
break
else:
self.nullable[lhs] = 1
changes = 1
def makeState0(self):
s0 = _State(0, [])
for rule in self.newrules[self._START]:
s0.items.append((rule, 0))
return s0
def finalState(self, tokens):
#
# Yuck.
#
if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
return 1
start = self.rules[self._START][0][1][1]
return self.goto(1, start)
def makeNewRules(self):
worklist = []
for rulelist in self.rules.values():
for rule in rulelist:
worklist.append((rule, 0, 1, rule))
for rule, i, candidate, oldrule in worklist:
lhs, rhs = rule
n = len(rhs)
while i < n:
sym = rhs[i]
if not self.rules.has_key(sym) or \
not self.nullable[sym]:
candidate = 0
i = i + 1
continue
newrhs = list(rhs)
newrhs[i] = self._NULLABLE+sym
newrule = (lhs, tuple(newrhs))
worklist.append((newrule, i+1,
candidate, oldrule))
candidate = 0
i = i + 1
else:
if candidate:
lhs = self._NULLABLE+lhs
rule = (lhs, rhs)
if self.newrules.has_key(lhs):
self.newrules[lhs].append(rule)
else:
self.newrules[lhs] = [ rule ]
self.new2old[rule] = oldrule
def typestring(self, token):
return None
def error(self, token):
print "Syntax error at or near `%s' token" % token
raise SystemExit
def parse(self, tokens):
sets = [ [(1,0), (2,0)] ]
self.links = {}
if self.ruleschanged:
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
for i in xrange(len(tokens)):
sets.append([])
if sets[i] == []:
break
self.makeSet(tokens[i], sets, i)
else:
sets.append([])
self.makeSet(None, sets, len(tokens))
#_dump(tokens, sets, self.states)
finalitem = (self.finalState(tokens), 0)
if finalitem not in sets[-2]:
if len(tokens) > 0:
self.error(tokens[i-1])
else:
self.error(None)
return self.buildTree(self._START, finalitem,
tokens, len(sets)-2)
def isnullable(self, sym):
#
# For symbols in G_e only. If we weren't supporting 1.5,
# could just use sym.startswith().
#
return self._NULLABLE == sym[0:len(self._NULLABLE)]
def skip(self, (lhs, rhs), pos=0):
n = len(rhs)
while pos < n:
if not self.isnullable(rhs[pos]):
break
pos = pos + 1
return pos
def makeState(self, state, sym):
assert sym is not None
#
# Compute \epsilon-kernel state's core and see if
# it exists already.
#
kitems = []
for rule, pos in self.states[state].items:
lhs, rhs = rule
if rhs[pos:pos+1] == (sym,):
kitems.append((rule, self.skip(rule, pos+1)))
core = kitems
core.sort()
tcore = tuple(core)
if self.cores.has_key(tcore):
return self.cores[tcore]
#
# Nope, doesn't exist. Compute it and the associated
# \epsilon-nonkernel state together; we'll need it right away.
#
k = self.cores[tcore] = len(self.states)
K, NK = _State(k, kitems), _State(k+1, [])
self.states[k] = K
predicted = {}
edges = self.edges
rules = self.newrules
for X in K, NK:
worklist = X.items
for item in worklist:
rule, pos = item
lhs, rhs = rule
if pos == len(rhs):
X.complete.append(rule)
continue
nextSym = rhs[pos]
key = (X.stateno, nextSym)
if not rules.has_key(nextSym):
if not edges.has_key(key):
edges[key] = None
X.T.append(nextSym)
else:
edges[key] = None
if not predicted.has_key(nextSym):
predicted[nextSym] = 1
for prule in rules[nextSym]:
ppos = self.skip(prule)
new = (prule, ppos)
NK.items.append(new)
#
# Problem: we know K needs generating, but we
# don't yet know about NK. Can't commit anything
# regarding NK to self.edges until we're sure. Should
# we delay committing on both K and NK to avoid this
# hacky code? This creates other problems..
#
if X is K:
edges = {}
if NK.items == []:
return k
#
# Check for \epsilon-nonkernel's core. Unfortunately we
# need to know the entire set of predicted nonterminals
# to do this without accidentally duplicating states.
#
core = predicted.keys()
core.sort()
tcore = tuple(core)
if self.cores.has_key(tcore):
self.edges[(k, None)] = self.cores[tcore]
return k
nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
self.edges.update(edges)
self.states[nk] = NK
return k
def goto(self, state, sym):
key = (state, sym)
if not self.edges.has_key(key):
#
# No transitions from state on sym.
#
return None
rv = self.edges[key]
if rv is None:
#
# Target state isn't generated yet. Remedy this.
#
rv = self.makeState(state, sym)
self.edges[key] = rv
return rv
def gotoT(self, state, t):
return [self.goto(state, t)]
def gotoST(self, state, st):
rv = []
for t in self.states[state].T:
if st == t:
rv.append(self.goto(state, t))
return rv
def add(self, set, item, i=None, predecessor=None, causal=None):
if predecessor is None:
if item not in set:
set.append(item)
else:
key = (item, i)
if item not in set:
self.links[key] = []
set.append(item)
self.links[key].append((predecessor, causal))
def makeSet(self, token, sets, i):
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
if ttype is not None:
fn, arg = self.gotoT, ttype
else:
fn, arg = self.gotoST, token
for item in cur:
ptr = (item, i)
state, parent = item
add = fn(state, arg)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
nk = self.goto(k, None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
k = self.goto(pstate, lhs)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
self.add(cur, (k, pparent),
i, pptr, why)
nk = self.goto(k, None)
if nk is not None:
self.add(cur, (nk, i))
def makeSet_fast(self, token, sets, i):
#
# Call *only* when the entire state machine has been built!
# It relies on self.edges being filled in completely, and
# then duplicates and inlines code to boost speed at the
# cost of extreme ugliness.
#
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
for item in cur:
ptr = (item, i)
state, parent = item
if ttype is not None:
k = self.edges.get((state, ttype), None)
if k is not None:
#self.add(next, (k, parent), i+1, ptr)
#INLINED --v
new = (k, parent)
key = (new, i+1)
if new not in next:
self.links[key] = []
next.append(new)
self.links[key].append((ptr, None))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(next, (nk, i+1))
#INLINED --v
new = (nk, i+1)
if new not in next:
next.append(new)
#INLINED --^
else:
add = self.gotoST(state, token)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
#k = self.goto(pstate, lhs)
k = self.edges.get((pstate, lhs), None)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
#self.add(cur, (k, pparent),
# i, pptr, why)
#INLINED --v
new = (k, pparent)
key = (new, i)
if new not in cur:
self.links[key] = []
cur.append(new)
self.links[key].append((pptr, why))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(cur, (nk, i))
#INLINED --v
new = (nk, i)
if new not in cur:
cur.append(new)
#INLINED --^
def predecessor(self, key, causal):
for p, c in self.links[key]:
if c == causal:
return p
assert 0
def causal(self, key):
links = self.links[key]
if len(links) == 1:
return links[0][1]
choices = []
rule2cause = {}
for p, c in links:
rule = c[2]
choices.append(rule)
rule2cause[rule] = c
return rule2cause[self.ambiguity(choices)]
def deriveEpsilon(self, nt):
if len(self.newrules[nt]) > 1:
rule = self.ambiguity(self.newrules[nt])
else:
rule = self.newrules[nt][0]
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in range(len(rhs)-1, -1, -1):
attr[i] = self.deriveEpsilon(rhs[i])
return self.rule2func[self.new2old[rule]](attr)
def buildTree(self, nt, item, tokens, k):
state, parent = item
choices = []
for rule in self.states[state].complete:
if rule[0] == nt:
choices.append(rule)
rule = choices[0]
if len(choices) > 1:
rule = self.ambiguity(choices)
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in range(len(rhs)-1, -1, -1):
sym = rhs[i]
if not self.newrules.has_key(sym):
if sym != self._BOF:
attr[i] = tokens[k-1]
key = (item, k)
item, k = self.predecessor(key, None)
#elif self.isnullable(sym):
elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
attr[i] = self.deriveEpsilon(sym)
else:
key = (item, k)
why = self.causal(key)
attr[i] = self.buildTree(sym, why[0],
tokens, why[1])
item, k = self.predecessor(key, why)
return self.rule2func[self.new2old[rule]](attr)
def ambiguity(self, rules):
#
# XXX - problem here and in collectRules() if the same rule
# appears in >1 method. Also undefined results if rules
# causing the ambiguity appear in the same method.
#
sortlist = []
name2index = {}
for i in range(len(rules)):
lhs, rhs = rule = rules[i]
name = self.rule2name[self.new2old[rule]]
sortlist.append((len(rhs), name))
name2index[name] = i
sortlist.sort()
list = map(lambda (a,b): b, sortlist)
return rules[name2index[self.resolve(list)]]
def resolve(self, list):
#
# Resolve ambiguity in favor of the shortest RHS.
# Since we walk the tree from the top down, this
# should effectively resolve in favor of a "shift".
#
return list[0]
#
# GenericASTBuilder automagically constructs a concrete/abstract syntax tree
# for a given input. The extra argument is a class (not an instance!)
# which supports the "__setslice__" and "__len__" methods.
#
# XXX - silently overrides any user code in methods.
#
class GenericASTBuilder(GenericParser):
def __init__(self, AST, start):
GenericParser.__init__(self, start)
self.AST = AST
def preprocess(self, rule, func):
rebind = lambda lhs, self=self: \
lambda args, lhs=lhs, self=self: \
self.buildASTNode(args, lhs)
lhs, rhs = rule
return rule, rebind(lhs)
def buildASTNode(self, args, lhs):
children = []
for arg in args:
if isinstance(arg, self.AST):
children.append(arg)
else:
children.append(self.terminal(arg))
return self.nonterminal(lhs, children)
def terminal(self, token): return token
def nonterminal(self, type, args):
rv = self.AST(type)
rv[:len(args)] = args
return rv
#
# GenericASTTraversal is a Visitor pattern according to Design Patterns. For
# each node it attempts to invoke the method n_<node type>, falling
# back onto the default() method if the n_* can't be found. The preorder
# traversal also looks for an exit hook named n_<node type>_exit (no default
# routine is called if it's not found). To prematurely halt traversal
# of a subtree, call the prune() method -- this only makes sense for a
# preorder traversal. Node type is determined via the typestring() method.
#
class GenericASTTraversalPruningException:
pass
class GenericASTTraversal:
def __init__(self, ast):
self.ast = ast
def typestring(self, node):
return node.type
def prune(self):
raise GenericASTTraversalPruningException
def preorder(self, node=None):
if node is None:
node = self.ast
try:
name = 'n_' + self.typestring(node)
if hasattr(self, name):
func = getattr(self, name)
func(node)
else:
self.default(node)
except GenericASTTraversalPruningException:
return
for kid in node:
self.preorder(kid)
name = name + '_exit'
if hasattr(self, name):
func = getattr(self, name)
func(node)
def postorder(self, node=None):
if node is None:
node = self.ast
for kid in node:
self.postorder(kid)
name = 'n_' + self.typestring(node)
if hasattr(self, name):
func = getattr(self, name)
func(node)
else:
self.default(node)
def default(self, node):
pass
#
# GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__"
# implemented.
#
# XXX - makes assumptions about how GenericParser walks the parse tree.
#
class GenericASTMatcher(GenericParser):
def __init__(self, start, ast):
GenericParser.__init__(self, start)
self.ast = ast
def preprocess(self, rule, func):
rebind = lambda func, self=self: \
lambda args, func=func, self=self: \
self.foundMatch(args, func)
lhs, rhs = rule
rhslist = list(rhs)
rhslist.reverse()
return (lhs, tuple(rhslist)), rebind(func)
def foundMatch(self, args, func):
func(args[-1])
return args[-1]
def match_r(self, node):
self.input.insert(0, node)
children = 0
for child in node:
if children == 0:
self.input.insert(0, '(')
children = children + 1
self.match_r(child)
if children > 0:
self.input.insert(0, ')')
def match(self, ast=None):
if ast is None:
ast = self.ast
self.input = []
self.match_r(ast)
self.parse(self.input)
def resolve(self, list):
#
# Resolve ambiguity in favor of the longest RHS.
#
return list[-1]
def _dump(tokens, sets, states):
for i in range(len(sets)):
print 'set', i
for item in sets[i]:
print '\t', item
for (lhs, rhs), pos in states[item[0]].items:
print '\t\t', lhs, '::=',
print string.join(rhs[:pos]),
print '.',
print string.join(rhs[pos:])
if i < len(tokens):
print
print 'token', str(tokens[i])
print

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,70 @@
#ifndef Py_TOKENIZER_H
#define Py_TOKENIZER_H
#ifdef __cplusplus
extern "C" {
#endif
#include "object.h"
/* Tokenizer interface */
#include "token.h" /* For token types */
#define MAXINDENT 100 /* Max indentation level */
/* Tokenizer state */
struct tok_state {
/* Input state; buf <= cur <= inp <= end */
/* NB an entire line is held in the buffer */
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
char *cur; /* Next character in buffer */
char *inp; /* End of data in buffer */
char *end; /* End of input buffer if buf != NULL */
char *start; /* Start of current token if not NULL */
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
/* NB If done != E_OK, cur must be == inp!!! */
FILE *fp; /* Rest of input; NULL if tokenizing a string */
int tabsize; /* Tab spacing */
int indent; /* Current indentation index */
int indstack[MAXINDENT]; /* Stack of indents */
int atbol; /* Nonzero if at begin of new line */
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
char *prompt, *nextprompt; /* For interactive prompting */
int lineno; /* Current line number */
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
const char *filename; /* For error messages */
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
int altindstack[MAXINDENT]; /* Stack of alternate indents */
/* Stuff for PEP 0263 */
int decoding_state; /* -1:decoding, 0:init, 1:raw */
int decoding_erred; /* whether erred in decoding */
int read_coding_spec; /* whether 'coding:...' has been read */
char *encoding;
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
#ifndef PGEN
PyObject *decoding_readline; /* codecs.open(...).readline */
PyObject *decoding_buffer;
#endif
const char* enc;
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
#if defined(PGEN) || defined(Py_USING_UNICODE)
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
int len, int *offset);
#endif
#ifdef __cplusplus
}
#endif
#endif /* !Py_TOKENIZER_H */

View File

@@ -0,0 +1,2 @@
#define PGEN
#include "tokenizer.c"