AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 1/5.

The Include, Parser, and Python directories from the cPython 2.7.10 distribution.
These files are unchanged and set the baseline for subsequent commits.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Daryl McDaniel <edk2-lists@mc2research.org>


git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18737 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
Daryl McDaniel
2015-11-07 19:19:19 +00:00
committed by darylm503
parent 3535213aae
commit c8042e1076
145 changed files with 62620 additions and 0 deletions

View File

@ -0,0 +1,125 @@
/* Parser accelerator module */
/* The parser as originally conceived had disappointing performance.
This module does some precomputation that speeds up the selection
of a DFA based upon a token, turning a search through an array
into a simple indexing operation. The parser now cannot work
without the accelerators installed. Note that the accelerators
are installed dynamically when the parser is initialized, they
are not part of the static data structure written on graminit.[ch]
by the parser generator. */
#include "pgenheaders.h"
#include "grammar.h"
#include "node.h"
#include "token.h"
#include "parser.h"
/* Forward references */
static void fixdfa(grammar *, dfa *);
static void fixstate(grammar *, state *);
void
PyGrammar_AddAccelerators(grammar *g)
{
dfa *d;
int i;
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++)
fixdfa(g, d);
g->g_accel = 1;
}
void
PyGrammar_RemoveAccelerators(grammar *g)
{
dfa *d;
int i;
g->g_accel = 0;
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++) {
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++) {
if (s->s_accel)
PyObject_FREE(s->s_accel);
s->s_accel = NULL;
}
}
}
static void
fixdfa(grammar *g, dfa *d)
{
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++)
fixstate(g, s);
}
static void
fixstate(grammar *g, state *s)
{
arc *a;
int k;
int *accel;
int nl = g->g_ll.ll_nlabels;
s->s_accept = 0;
accel = (int *) PyObject_MALLOC(nl * sizeof(int));
if (accel == NULL) {
fprintf(stderr, "no mem to build parser accelerators\n");
exit(1);
}
for (k = 0; k < nl; k++)
accel[k] = -1;
a = s->s_arc;
for (k = s->s_narcs; --k >= 0; a++) {
int lbl = a->a_lbl;
label *l = &g->g_ll.ll_label[lbl];
int type = l->lb_type;
if (a->a_arrow >= (1 << 7)) {
printf("XXX too many states!\n");
continue;
}
if (ISNONTERMINAL(type)) {
dfa *d1 = PyGrammar_FindDFA(g, type);
int ibit;
if (type - NT_OFFSET >= (1 << 7)) {
printf("XXX too high nonterminal number!\n");
continue;
}
for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
if (testbit(d1->d_first, ibit)) {
if (accel[ibit] != -1)
printf("XXX ambiguity!\n");
accel[ibit] = a->a_arrow | (1 << 7) |
((type - NT_OFFSET) << 8);
}
}
}
else if (lbl == EMPTY)
s->s_accept = 1;
else if (lbl >= 0 && lbl < nl)
accel[lbl] = a->a_arrow;
}
while (nl > 0 && accel[nl-1] == -1)
nl--;
for (k = 0; k < nl && accel[k] == -1;)
k++;
if (k < nl) {
int i;
s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
if (s->s_accel == NULL) {
fprintf(stderr, "no mem to add parser accelerators\n");
exit(1);
}
s->s_lower = k;
s->s_upper = nl;
for (i = 0; k < nl; i++, k++)
s->s_accel[i] = accel[k];
}
PyObject_FREE(accel);
}

View File

@ -0,0 +1,66 @@
/* Bitset primitives used by the parser generator */
#include "pgenheaders.h"
#include "bitset.h"
bitset
newbitset(int nbits)
{
int nbytes = NBYTES(nbits);
bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes);
if (ss == NULL)
Py_FatalError("no mem for bitset");
ss += nbytes;
while (--nbytes >= 0)
*--ss = 0;
return ss;
}
void
delbitset(bitset ss)
{
PyObject_FREE(ss);
}
int
addbit(bitset ss, int ibit)
{
int ibyte = BIT2BYTE(ibit);
BYTE mask = BIT2MASK(ibit);
if (ss[ibyte] & mask)
return 0; /* Bit already set */
ss[ibyte] |= mask;
return 1;
}
#if 0 /* Now a macro */
int
testbit(bitset ss, int ibit)
{
return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
}
#endif
int
samebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
if (*ss1++ != *ss2++)
return 0;
return 1;
}
void
mergebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
*ss1++ |= *ss2++;
}

View File

@ -0,0 +1,113 @@
/* Computation of FIRST stets */
#include "pgenheaders.h"
#include "grammar.h"
#include "token.h"
extern int Py_DebugFlag;
/* Forward */
static void calcfirstset(grammar *, dfa *);
void
addfirstsets(grammar *g)
{
int i;
dfa *d;
if (Py_DebugFlag)
printf("Adding FIRST sets ...\n");
for (i = 0; i < g->g_ndfas; i++) {
d = &g->g_dfa[i];
if (d->d_first == NULL)
calcfirstset(g, d);
}
}
static void
calcfirstset(grammar *g, dfa *d)
{
int i, j;
state *s;
arc *a;
int nsyms;
int *sym;
int nbits;
static bitset dummy;
bitset result;
int type;
dfa *d1;
label *l0;
if (Py_DebugFlag)
printf("Calculate FIRST set for '%s'\n", d->d_name);
if (dummy == NULL)
dummy = newbitset(1);
if (d->d_first == dummy) {
fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
return;
}
if (d->d_first != NULL) {
fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
d->d_name);
}
d->d_first = dummy;
l0 = g->g_ll.ll_label;
nbits = g->g_ll.ll_nlabels;
result = newbitset(nbits);
sym = (int *)PyObject_MALLOC(sizeof(int));
if (sym == NULL)
Py_FatalError("no mem for new sym in calcfirstset");
nsyms = 1;
sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
s = &d->d_state[d->d_initial];
for (i = 0; i < s->s_narcs; i++) {
a = &s->s_arc[i];
for (j = 0; j < nsyms; j++) {
if (sym[j] == a->a_lbl)
break;
}
if (j >= nsyms) { /* New label */
sym = (int *)PyObject_REALLOC(sym,
sizeof(int) * (nsyms + 1));
if (sym == NULL)
Py_FatalError(
"no mem to resize sym in calcfirstset");
sym[nsyms++] = a->a_lbl;
type = l0[a->a_lbl].lb_type;
if (ISNONTERMINAL(type)) {
d1 = PyGrammar_FindDFA(g, type);
if (d1->d_first == dummy) {
fprintf(stderr,
"Left-recursion below '%s'\n",
d->d_name);
}
else {
if (d1->d_first == NULL)
calcfirstset(g, d1);
mergebitset(result,
d1->d_first, nbits);
}
}
else if (ISTERMINAL(type)) {
addbit(result, a->a_lbl);
}
}
}
d->d_first = result;
if (Py_DebugFlag) {
printf("FIRST set for '%s': {", d->d_name);
for (i = 0; i < nbits; i++) {
if (testbit(result, i))
printf(" %s", PyGrammar_LabelRepr(&l0[i]));
}
printf(" }\n");
}
PyObject_FREE(sym);
}

View File

@ -0,0 +1,254 @@
/* Grammar implementation */
#include "Python.h"
#include "pgenheaders.h"
#include <ctype.h>
#include "token.h"
#include "grammar.h"
#ifdef RISCOS
#include <unixlib.h>
#endif
extern int Py_DebugFlag;
grammar *
newgrammar(int start)
{
grammar *g;
g = (grammar *)PyObject_MALLOC(sizeof(grammar));
if (g == NULL)
Py_FatalError("no mem for new grammar");
g->g_ndfas = 0;
g->g_dfa = NULL;
g->g_start = start;
g->g_ll.ll_nlabels = 0;
g->g_ll.ll_label = NULL;
g->g_accel = 0;
return g;
}
dfa *
adddfa(grammar *g, int type, char *name)
{
dfa *d;
g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
sizeof(dfa) * (g->g_ndfas + 1));
if (g->g_dfa == NULL)
Py_FatalError("no mem to resize dfa in adddfa");
d = &g->g_dfa[g->g_ndfas++];
d->d_type = type;
d->d_name = strdup(name);
d->d_nstates = 0;
d->d_state = NULL;
d->d_initial = -1;
d->d_first = NULL;
return d; /* Only use while fresh! */
}
int
addstate(dfa *d)
{
state *s;
d->d_state = (state *)PyObject_REALLOC(d->d_state,
sizeof(state) * (d->d_nstates + 1));
if (d->d_state == NULL)
Py_FatalError("no mem to resize state in addstate");
s = &d->d_state[d->d_nstates++];
s->s_narcs = 0;
s->s_arc = NULL;
s->s_lower = 0;
s->s_upper = 0;
s->s_accel = NULL;
s->s_accept = 0;
return s - d->d_state;
}
void
addarc(dfa *d, int from, int to, int lbl)
{
state *s;
arc *a;
assert(0 <= from && from < d->d_nstates);
assert(0 <= to && to < d->d_nstates);
s = &d->d_state[from];
s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
if (s->s_arc == NULL)
Py_FatalError("no mem to resize arc list in addarc");
a = &s->s_arc[s->s_narcs++];
a->a_lbl = lbl;
a->a_arrow = to;
}
int
addlabel(labellist *ll, int type, char *str)
{
int i;
label *lb;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type &&
strcmp(ll->ll_label[i].lb_str, str) == 0)
return i;
}
ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
sizeof(label) * (ll->ll_nlabels + 1));
if (ll->ll_label == NULL)
Py_FatalError("no mem to resize labellist in addlabel");
lb = &ll->ll_label[ll->ll_nlabels++];
lb->lb_type = type;
lb->lb_str = strdup(str);
if (Py_DebugFlag)
printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
PyGrammar_LabelRepr(lb));
return lb - ll->ll_label;
}
/* Same, but rather dies than adds */
int
findlabel(labellist *ll, int type, char *str)
{
int i;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type /*&&
strcmp(ll->ll_label[i].lb_str, str) == 0*/)
return i;
}
fprintf(stderr, "Label %d/'%s' not found\n", type, str);
Py_FatalError("grammar.c:findlabel()");
return 0; /* Make gcc -Wall happy */
}
/* Forward */
static void translabel(grammar *, label *);
void
translatelabels(grammar *g)
{
int i;
#ifdef Py_DEBUG
printf("Translating labels ...\n");
#endif
/* Don't translate EMPTY */
for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
translabel(g, &g->g_ll.ll_label[i]);
}
static void
translabel(grammar *g, label *lb)
{
int i;
if (Py_DebugFlag)
printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
if (lb->lb_type == NAME) {
for (i = 0; i < g->g_ndfas; i++) {
if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
if (Py_DebugFlag)
printf(
"Label %s is non-terminal %d.\n",
lb->lb_str,
g->g_dfa[i].d_type);
lb->lb_type = g->g_dfa[i].d_type;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
for (i = 0; i < (int)N_TOKENS; i++) {
if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
if (Py_DebugFlag)
printf("Label %s is terminal %d.\n",
lb->lb_str, i);
lb->lb_type = i;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
printf("Can't translate NAME label '%s'\n", lb->lb_str);
return;
}
if (lb->lb_type == STRING) {
if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
lb->lb_str[1] == '_') {
char *p;
char *src;
char *dest;
size_t name_len;
if (Py_DebugFlag)
printf("Label %s is a keyword\n", lb->lb_str);
lb->lb_type = NAME;
src = lb->lb_str + 1;
p = strchr(src, '\'');
if (p)
name_len = p - src;
else
name_len = strlen(src);
dest = (char *)malloc(name_len + 1);
if (!dest) {
printf("Can't alloc dest '%s'\n", src);
return;
}
strncpy(dest, src, name_len);
dest[name_len] = '\0';
free(lb->lb_str);
lb->lb_str = dest;
}
else if (lb->lb_str[2] == lb->lb_str[0]) {
int type = (int) PyToken_OneChar(lb->lb_str[1]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
int type = (int) PyToken_TwoChars(lb->lb_str[1],
lb->lb_str[2]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
int type = (int) PyToken_ThreeChars(lb->lb_str[1],
lb->lb_str[2],
lb->lb_str[3]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else
printf("Can't translate STRING label %s\n",
lb->lb_str);
}
else
printf("Can't translate label '%s'\n",
PyGrammar_LabelRepr(lb));
}

View File

@ -0,0 +1,57 @@
/* Grammar subroutines needed by parser */
#include "Python.h"
#include "pgenheaders.h"
#include "grammar.h"
#include "token.h"
/* Return the DFA for the given type */
dfa *
PyGrammar_FindDFA(grammar *g, register int type)
{
register dfa *d;
#if 1
/* Massive speed-up */
d = &g->g_dfa[type - NT_OFFSET];
assert(d->d_type == type);
return d;
#else
/* Old, slow version */
register int i;
for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
if (d->d_type == type)
return d;
}
assert(0);
/* NOTREACHED */
#endif
}
char *
PyGrammar_LabelRepr(label *lb)
{
static char buf[100];
if (lb->lb_type == ENDMARKER)
return "EMPTY";
else if (ISNONTERMINAL(lb->lb_type)) {
if (lb->lb_str == NULL) {
PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
return buf;
}
else
return lb->lb_str;
}
else {
if (lb->lb_str == NULL)
return _PyParser_TokenNames[lb->lb_type];
else {
PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
_PyParser_TokenNames[lb->lb_type], lb->lb_str);
return buf;
}
}
}

View File

@ -0,0 +1,66 @@
/* List a node on a file */
#include "pgenheaders.h"
#include "token.h"
#include "node.h"
/* Forward */
static void list1node(FILE *, node *);
static void listnode(FILE *, node *);
void
PyNode_ListTree(node *n)
{
listnode(stdout, n);
}
static int level, atbol;
static void
listnode(FILE *fp, node *n)
{
level = 0;
atbol = 1;
list1node(fp, n);
}
static void
list1node(FILE *fp, node *n)
{
if (n == 0)
return;
if (ISNONTERMINAL(TYPE(n))) {
int i;
for (i = 0; i < NCH(n); i++)
list1node(fp, CHILD(n, i));
}
else if (ISTERMINAL(TYPE(n))) {
switch (TYPE(n)) {
case INDENT:
++level;
break;
case DEDENT:
--level;
break;
default:
if (atbol) {
int i;
for (i = 0; i < level; ++i)
fprintf(fp, "\t");
atbol = 0;
}
if (TYPE(n) == NEWLINE) {
if (STR(n) != NULL)
fprintf(fp, "%s", STR(n));
fprintf(fp, "\n");
atbol = 1;
}
else
fprintf(fp, "%s ", STR(n));
break;
}
}
else
fprintf(fp, "? ");
}

View File

@ -0,0 +1,159 @@
#include "pgenheaders.h"
#include "metagrammar.h"
#include "grammar.h"
#include "pgen.h"
static arc arcs_0_0[3] = {
{2, 0},
{3, 0},
{4, 1},
};
static arc arcs_0_1[1] = {
{0, 1},
};
static state states_0[2] = {
{3, arcs_0_0},
{1, arcs_0_1},
};
static arc arcs_1_0[1] = {
{5, 1},
};
static arc arcs_1_1[1] = {
{6, 2},
};
static arc arcs_1_2[1] = {
{7, 3},
};
static arc arcs_1_3[1] = {
{3, 4},
};
static arc arcs_1_4[1] = {
{0, 4},
};
static state states_1[5] = {
{1, arcs_1_0},
{1, arcs_1_1},
{1, arcs_1_2},
{1, arcs_1_3},
{1, arcs_1_4},
};
static arc arcs_2_0[1] = {
{8, 1},
};
static arc arcs_2_1[2] = {
{9, 0},
{0, 1},
};
static state states_2[2] = {
{1, arcs_2_0},
{2, arcs_2_1},
};
static arc arcs_3_0[1] = {
{10, 1},
};
static arc arcs_3_1[2] = {
{10, 1},
{0, 1},
};
static state states_3[2] = {
{1, arcs_3_0},
{2, arcs_3_1},
};
static arc arcs_4_0[2] = {
{11, 1},
{13, 2},
};
static arc arcs_4_1[1] = {
{7, 3},
};
static arc arcs_4_2[3] = {
{14, 4},
{15, 4},
{0, 2},
};
static arc arcs_4_3[1] = {
{12, 4},
};
static arc arcs_4_4[1] = {
{0, 4},
};
static state states_4[5] = {
{2, arcs_4_0},
{1, arcs_4_1},
{3, arcs_4_2},
{1, arcs_4_3},
{1, arcs_4_4},
};
static arc arcs_5_0[3] = {
{5, 1},
{16, 1},
{17, 2},
};
static arc arcs_5_1[1] = {
{0, 1},
};
static arc arcs_5_2[1] = {
{7, 3},
};
static arc arcs_5_3[1] = {
{18, 1},
};
static state states_5[4] = {
{3, arcs_5_0},
{1, arcs_5_1},
{1, arcs_5_2},
{1, arcs_5_3},
};
static dfa dfas[6] = {
{256, "MSTART", 0, 2, states_0,
"\070\000\000"},
{257, "RULE", 0, 5, states_1,
"\040\000\000"},
{258, "RHS", 0, 2, states_2,
"\040\010\003"},
{259, "ALT", 0, 2, states_3,
"\040\010\003"},
{260, "ITEM", 0, 5, states_4,
"\040\010\003"},
{261, "ATOM", 0, 4, states_5,
"\040\000\003"},
};
static label labels[19] = {
{0, "EMPTY"},
{256, 0},
{257, 0},
{4, 0},
{0, 0},
{1, 0},
{11, 0},
{258, 0},
{259, 0},
{18, 0},
{260, 0},
{9, 0},
{10, 0},
{261, 0},
{16, 0},
{14, 0},
{3, 0},
{7, 0},
{8, 0},
};
static grammar _PyParser_Grammar = {
6,
dfas,
{19, labels},
256
};
grammar *
meta_grammar(void)
{
return &_PyParser_Grammar;
}
grammar *
Py_meta_grammar(void)
{
return meta_grammar();
}

View File

@ -0,0 +1,218 @@
/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
By default, or when stdin is not a tty device, we have a super
simple my_readline function using fgets.
Optionally, we can use the GNU readline library.
my_readline() has a different return value from GNU readline():
- NULL if an interrupt occurred or if an error occurred
- a malloc'ed empty string if EOF was read
- a malloc'ed string ending in \n normally
*/
#include "Python.h"
#ifdef MS_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include "windows.h"
#endif /* MS_WINDOWS */
#ifdef __VMS
extern char* vms__StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt);
#endif
PyThreadState* _PyOS_ReadlineTState;
#ifdef WITH_THREAD
#include "pythread.h"
static PyThread_type_lock _PyOS_ReadlineLock = NULL;
#endif
int (*PyOS_InputHook)(void) = NULL;
#ifdef RISCOS
int Py_RISCOSWimpFlag;
#endif
/* This function restarts a fgets() after an EINTR error occurred
except if PyOS_InterruptOccurred() returns true. */
static int
my_fgets(char *buf, int len, FILE *fp)
{
char *p;
#ifdef MS_WINDOWS
int i;
#endif
while (1) {
if (PyOS_InputHook != NULL)
(void)(PyOS_InputHook)();
errno = 0;
clearerr(fp);
p = fgets(buf, len, fp);
if (p != NULL)
return 0; /* No error */
#ifdef MS_WINDOWS
/* Ctrl-C anywhere on the line or Ctrl-Z if the only character
on a line will set ERROR_OPERATION_ABORTED. Under normal
circumstances Ctrl-C will also have caused the SIGINT handler
to fire. This signal fires in another thread and is not
guaranteed to have occurred before this point in the code.
Therefore: check in a small loop to see if the trigger has
fired, in which case assume this is a Ctrl-C event. If it
hasn't fired within 10ms assume that this is a Ctrl-Z on its
own or that the signal isn't going to fire for some other
reason and drop through to check for EOF.
*/
if (GetLastError()==ERROR_OPERATION_ABORTED) {
for (i = 0; i < 10; i++) {
if (PyOS_InterruptOccurred())
return 1;
Sleep(1);
}
}
#endif /* MS_WINDOWS */
if (feof(fp)) {
clearerr(fp);
return -1; /* EOF */
}
#ifdef EINTR
if (errno == EINTR) {
int s;
#ifdef WITH_THREAD
PyEval_RestoreThread(_PyOS_ReadlineTState);
#endif
s = PyErr_CheckSignals();
#ifdef WITH_THREAD
PyEval_SaveThread();
#endif
if (s < 0)
return 1;
/* try again */
continue;
}
#endif
if (PyOS_InterruptOccurred()) {
return 1; /* Interrupt */
}
return -2; /* Error */
}
/* NOTREACHED */
}
/* Readline implementation using fgets() */
char *
PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
size_t n;
char *p;
n = 100;
if ((p = (char *)PyMem_MALLOC(n)) == NULL)
return NULL;
fflush(sys_stdout);
#ifndef RISCOS
if (prompt)
fprintf(stderr, "%s", prompt);
#else
if (prompt) {
if(Py_RISCOSWimpFlag)
fprintf(stderr, "\x0cr%s\x0c", prompt);
else
fprintf(stderr, "%s", prompt);
}
#endif
fflush(stderr);
switch (my_fgets(p, (int)n, sys_stdin)) {
case 0: /* Normal case */
break;
case 1: /* Interrupt */
PyMem_FREE(p);
return NULL;
case -1: /* EOF */
case -2: /* Error */
default: /* Shouldn't happen */
*p = '\0';
break;
}
n = strlen(p);
while (n > 0 && p[n-1] != '\n') {
size_t incr = n+2;
p = (char *)PyMem_REALLOC(p, n + incr);
if (p == NULL)
return NULL;
if (incr > INT_MAX) {
PyErr_SetString(PyExc_OverflowError, "input line too long");
}
if (my_fgets(p+n, (int)incr, sys_stdin) != 0)
break;
n += strlen(p+n);
}
return (char *)PyMem_REALLOC(p, n+1);
}
/* By initializing this function pointer, systems embedding Python can
override the readline function.
Note: Python expects in return a buffer allocated with PyMem_Malloc. */
char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *);
/* Interface used by tokenizer.c and bltinmodule.c */
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
char *rv;
if (_PyOS_ReadlineTState == PyThreadState_GET()) {
PyErr_SetString(PyExc_RuntimeError,
"can't re-enter readline");
return NULL;
}
if (PyOS_ReadlineFunctionPointer == NULL) {
#ifdef __VMS
PyOS_ReadlineFunctionPointer = vms__StdioReadline;
#else
PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
#endif
}
#ifdef WITH_THREAD
if (_PyOS_ReadlineLock == NULL) {
_PyOS_ReadlineLock = PyThread_allocate_lock();
}
#endif
_PyOS_ReadlineTState = PyThreadState_GET();
Py_BEGIN_ALLOW_THREADS
#ifdef WITH_THREAD
PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
#endif
/* This is needed to handle the unlikely case that the
* interpreter is in interactive mode *and* stdin/out are not
* a tty. This can happen, for example if python is run like
* this: python -i < test1.py
*/
if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))
rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);
else
rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,
prompt);
Py_END_ALLOW_THREADS
#ifdef WITH_THREAD
PyThread_release_lock(_PyOS_ReadlineLock);
#endif
_PyOS_ReadlineTState = NULL;
return rv;
}

View File

@ -0,0 +1,164 @@
/* Parse tree node implementation */
#include "Python.h"
#include "node.h"
#include "errcode.h"
node *
PyNode_New(int type)
{
node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
if (n == NULL)
return NULL;
n->n_type = type;
n->n_str = NULL;
n->n_lineno = 0;
n->n_nchildren = 0;
n->n_child = NULL;
return n;
}
/* See comments at XXXROUNDUP below. Returns -1 on overflow. */
static int
fancy_roundup(int n)
{
/* Round up to the closest power of 2 >= n. */
int result = 256;
assert(n > 128);
while (result < n) {
result <<= 1;
if (result <= 0)
return -1;
}
return result;
}
/* A gimmick to make massive numbers of reallocs quicker. The result is
* a number >= the input. In PyNode_AddChild, it's used like so, when
* we're about to add child number current_size + 1:
*
* if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1):
* allocate space for XXXROUNDUP(current_size + 1) total children
* else:
* we already have enough space
*
* Since a node starts out empty, we must have
*
* XXXROUNDUP(0) < XXXROUNDUP(1)
*
* so that we allocate space for the first child. One-child nodes are very
* common (presumably that would change if we used a more abstract form
* of syntax tree), so to avoid wasting memory it's desirable that
* XXXROUNDUP(1) == 1. That in turn forces XXXROUNDUP(0) == 0.
*
* Else for 2 <= n <= 128, we round up to the closest multiple of 4. Why 4?
* Rounding up to a multiple of an exact power of 2 is very efficient, and
* most nodes with more than one child have <= 4 kids.
*
* Else we call fancy_roundup() to grow proportionately to n. We've got an
* extreme case then (like test_longexp.py), and on many platforms doing
* anything less than proportional growth leads to exorbitant runtime
* (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
* Win98).
*
* In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre
* reported that, with this scheme, 89% of PyObject_REALLOC calls in
* PyNode_AddChild passed 1 for the size, and 9% passed 4. So this usually
* wastes very little memory, but is very effective at sidestepping
* platform-realloc disasters on vulnerable platforms.
*
* Note that this would be straightforward if a node stored its current
* capacity. The code is tricky to avoid that.
*/
#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \
(n) <= 128 ? (((n) + 3) & ~3) : \
fancy_roundup(n))
int
PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset)
{
const int nch = n1->n_nchildren;
int current_capacity;
int required_capacity;
node *n;
if (nch == INT_MAX || nch < 0)
return E_OVERFLOW;
current_capacity = XXXROUNDUP(nch);
required_capacity = XXXROUNDUP(nch + 1);
if (current_capacity < 0 || required_capacity < 0)
return E_OVERFLOW;
if (current_capacity < required_capacity) {
if (required_capacity > PY_SIZE_MAX / sizeof(node)) {
return E_NOMEM;
}
n = n1->n_child;
n = (node *) PyObject_REALLOC(n,
required_capacity * sizeof(node));
if (n == NULL)
return E_NOMEM;
n1->n_child = n;
}
n = &n1->n_child[n1->n_nchildren++];
n->n_type = type;
n->n_str = str;
n->n_lineno = lineno;
n->n_col_offset = col_offset;
n->n_nchildren = 0;
n->n_child = NULL;
return 0;
}
/* Forward */
static void freechildren(node *);
static Py_ssize_t sizeofchildren(node *n);
void
PyNode_Free(node *n)
{
if (n != NULL) {
freechildren(n);
PyObject_FREE(n);
}
}
Py_ssize_t
_PyNode_SizeOf(node *n)
{
Py_ssize_t res = 0;
if (n != NULL)
res = sizeof(node) + sizeofchildren(n);
return res;
}
static void
freechildren(node *n)
{
int i;
for (i = NCH(n); --i >= 0; )
freechildren(CHILD(n, i));
if (n->n_child != NULL)
PyObject_FREE(n->n_child);
if (STR(n) != NULL)
PyObject_FREE(STR(n));
}
static Py_ssize_t
sizeofchildren(node *n)
{
Py_ssize_t res = 0;
int i;
for (i = NCH(n); --i >= 0; )
res += sizeofchildren(CHILD(n, i));
if (n->n_child != NULL)
/* allocated size of n->n_child array */
res += XXXROUNDUP(NCH(n)) * sizeof(node);
if (STR(n) != NULL)
res += strlen(STR(n)) + 1;
return res;
}

View File

@ -0,0 +1,436 @@
/* Parser implementation */
/* For a description, see the comments at end of this file */
/* XXX To do: error recovery */
#include "Python.h"
#include "pgenheaders.h"
#include "token.h"
#include "grammar.h"
#include "node.h"
#include "parser.h"
#include "errcode.h"
#ifdef Py_DEBUG
extern int Py_DebugFlag;
#define D(x) if (!Py_DebugFlag); else x
#else
#define D(x)
#endif
/* STACK DATA TYPE */
static void s_reset(stack *);
static void
s_reset(stack *s)
{
s->s_top = &s->s_base[MAXSTACK];
}
#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
static int
s_push(register stack *s, dfa *d, node *parent)
{
register stackentry *top;
if (s->s_top == s->s_base) {
fprintf(stderr, "s_push: parser stack overflow\n");
return E_NOMEM;
}
top = --s->s_top;
top->s_dfa = d;
top->s_parent = parent;
top->s_state = 0;
return 0;
}
#ifdef Py_DEBUG
static void
s_pop(register stack *s)
{
if (s_empty(s))
Py_FatalError("s_pop: parser stack underflow -- FATAL");
s->s_top++;
}
#else /* !Py_DEBUG */
#define s_pop(s) (s)->s_top++
#endif
/* PARSER CREATION */
parser_state *
PyParser_New(grammar *g, int start)
{
parser_state *ps;
if (!g->g_accel)
PyGrammar_AddAccelerators(g);
ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
if (ps == NULL)
return NULL;
ps->p_grammar = g;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
ps->p_flags = 0;
#endif
ps->p_tree = PyNode_New(start);
if (ps->p_tree == NULL) {
PyMem_FREE(ps);
return NULL;
}
s_reset(&ps->p_stack);
(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
return ps;
}
void
PyParser_Delete(parser_state *ps)
{
/* NB If you want to save the parse tree,
you must set p_tree to NULL before calling delparser! */
PyNode_Free(ps->p_tree);
PyMem_FREE(ps);
}
/* PARSER STACK OPERATIONS */
static int
shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
{
int err;
assert(!s_empty(s));
err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
return 0;
}
static int
push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
{
int err;
register node *n;
n = s->s_top->s_parent;
assert(!s_empty(s));
err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
return s_push(s, d, CHILD(n, NCH(n)-1));
}
/* PARSER PROPER */
static int
classify(parser_state *ps, int type, char *str)
{
grammar *g = ps->p_grammar;
register int n = g->g_ll.ll_nlabels;
if (type == NAME) {
register char *s = str;
register label *l = g->g_ll.ll_label;
register int i;
for (i = n; i > 0; i--, l++) {
if (l->lb_type != NAME || l->lb_str == NULL ||
l->lb_str[0] != s[0] ||
strcmp(l->lb_str, s) != 0)
continue;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
s[0] == 'p' && strcmp(s, "print") == 0) {
break; /* no longer a keyword */
}
#endif
D(printf("It's a keyword\n"));
return n - i;
}
}
{
register label *l = g->g_ll.ll_label;
register int i;
for (i = n; i > 0; i--, l++) {
if (l->lb_type == type && l->lb_str == NULL) {
D(printf("It's a token we know\n"));
return n - i;
}
}
}
D(printf("Illegal token\n"));
return -1;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
static void
future_hack(parser_state *ps)
{
node *n = ps->p_stack.s_top->s_parent;
node *ch, *cch;
int i;
/* from __future__ import ..., must have at least 4 children */
n = CHILD(n, 0);
if (NCH(n) < 4)
return;
ch = CHILD(n, 0);
if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
return;
ch = CHILD(n, 1);
if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
return;
ch = CHILD(n, 3);
/* ch can be a star, a parenthesis or import_as_names */
if (TYPE(ch) == STAR)
return;
if (TYPE(ch) == LPAR)
ch = CHILD(n, 4);
for (i = 0; i < NCH(ch); i += 2) {
cch = CHILD(ch, i);
if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
char *str_ch = STR(CHILD(cch, 0));
if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
}
}
}
}
#endif /* future keyword */
int
PyParser_AddToken(register parser_state *ps, register int type, char *str,
int lineno, int col_offset, int *expected_ret)
{
register int ilabel;
int err;
D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
/* Find out which label this token is */
ilabel = classify(ps, type, str);
if (ilabel < 0)
return E_SYNTAX;
/* Loop until the token is shifted or an error occurred */
for (;;) {
/* Fetch the current dfa and state */
register dfa *d = ps->p_stack.s_top->s_dfa;
register state *s = &d->d_state[ps->p_stack.s_top->s_state];
D(printf(" DFA '%s', state %d:",
d->d_name, ps->p_stack.s_top->s_state));
/* Check accelerator */
if (s->s_lower <= ilabel && ilabel < s->s_upper) {
register int x = s->s_accel[ilabel - s->s_lower];
if (x != -1) {
if (x & (1<<7)) {
/* Push non-terminal */
int nt = (x >> 8) + NT_OFFSET;
int arrow = x & ((1<<7)-1);
dfa *d1 = PyGrammar_FindDFA(
ps->p_grammar, nt);
if ((err = push(&ps->p_stack, nt, d1,
arrow, lineno, col_offset)) > 0) {
D(printf(" MemError: push\n"));
return err;
}
D(printf(" Push ...\n"));
continue;
}
/* Shift the token */
if ((err = shift(&ps->p_stack, type, str,
x, lineno, col_offset)) > 0) {
D(printf(" MemError: shift.\n"));
return err;
}
D(printf(" Shift.\n"));
/* Pop while we are in an accept-only state */
while (s = &d->d_state
[ps->p_stack.s_top->s_state],
s->s_accept && s->s_narcs == 1) {
D(printf(" DFA '%s', state %d: "
"Direct pop.\n",
d->d_name,
ps->p_stack.s_top->s_state));
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (d->d_name[0] == 'i' &&
strcmp(d->d_name,
"import_stmt") == 0)
future_hack(ps);
#endif
s_pop(&ps->p_stack);
if (s_empty(&ps->p_stack)) {
D(printf(" ACCEPT.\n"));
return E_DONE;
}
d = ps->p_stack.s_top->s_dfa;
}
return E_OK;
}
}
if (s->s_accept) {
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (d->d_name[0] == 'i' &&
strcmp(d->d_name, "import_stmt") == 0)
future_hack(ps);
#endif
/* Pop this dfa and try again */
s_pop(&ps->p_stack);
D(printf(" Pop ...\n"));
if (s_empty(&ps->p_stack)) {
D(printf(" Error: bottom of stack.\n"));
return E_SYNTAX;
}
continue;
}
/* Stuck, report syntax error */
D(printf(" Error.\n"));
if (expected_ret) {
if (s->s_lower == s->s_upper - 1) {
/* Only one possible expected token */
*expected_ret = ps->p_grammar->
g_ll.ll_label[s->s_lower].lb_type;
}
else
*expected_ret = -1;
}
return E_SYNTAX;
}
}
#ifdef Py_DEBUG
/* DEBUG OUTPUT */
void
dumptree(grammar *g, node *n)
{
int i;
if (n == NULL)
printf("NIL");
else {
label l;
l.lb_type = TYPE(n);
l.lb_str = STR(n);
printf("%s", PyGrammar_LabelRepr(&l));
if (ISNONTERMINAL(TYPE(n))) {
printf("(");
for (i = 0; i < NCH(n); i++) {
if (i > 0)
printf(",");
dumptree(g, CHILD(n, i));
}
printf(")");
}
}
}
void
showtree(grammar *g, node *n)
{
int i;
if (n == NULL)
return;
if (ISNONTERMINAL(TYPE(n))) {
for (i = 0; i < NCH(n); i++)
showtree(g, CHILD(n, i));
}
else if (ISTERMINAL(TYPE(n))) {
printf("%s", _PyParser_TokenNames[TYPE(n)]);
if (TYPE(n) == NUMBER || TYPE(n) == NAME)
printf("(%s)", STR(n));
printf(" ");
}
else
printf("? ");
}
void
printtree(parser_state *ps)
{
if (Py_DebugFlag) {
printf("Parse tree:\n");
dumptree(ps->p_grammar, ps->p_tree);
printf("\n");
printf("Tokens:\n");
showtree(ps->p_grammar, ps->p_tree);
printf("\n");
}
printf("Listing:\n");
PyNode_ListTree(ps->p_tree);
printf("\n");
}
#endif /* Py_DEBUG */
/*
Description
-----------
The parser's interface is different than usual: the function addtoken()
must be called for each token in the input. This makes it possible to
turn it into an incremental parsing system later. The parsing system
constructs a parse tree as it goes.
A parsing rule is represented as a Deterministic Finite-state Automaton
(DFA). A node in a DFA represents a state of the parser; an arc represents
a transition. Transitions are either labeled with terminal symbols or
with non-terminals. When the parser decides to follow an arc labeled
with a non-terminal, it is invoked recursively with the DFA representing
the parsing rule for that as its initial state; when that DFA accepts,
the parser that invoked it continues. The parse tree constructed by the
recursively called parser is inserted as a child in the current parse tree.
The DFA's can be constructed automatically from a more conventional
language description. An extended LL(1) grammar (ELL(1)) is suitable.
Certain restrictions make the parser's life easier: rules that can produce
the empty string should be outlawed (there are other ways to put loops
or optional parts in the language). To avoid the need to construct
FIRST sets, we can require that all but the last alternative of a rule
(really: arc going out of a DFA's state) must begin with a terminal
symbol.
As an example, consider this grammar:
expr: term (OP term)*
term: CONSTANT | '(' expr ')'
The DFA corresponding to the rule for expr is:
------->.---term-->.------->
^ |
| |
\----OP----/
The parse tree generated for the input a+b is:
(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
*/

View File

@ -0,0 +1,42 @@
#ifndef Py_PARSER_H
#define Py_PARSER_H
#ifdef __cplusplus
extern "C" {
#endif
/* Parser interface */
#define MAXSTACK 1500
typedef struct {
int s_state; /* State in current DFA */
dfa *s_dfa; /* Current DFA */
struct _node *s_parent; /* Where to add next node */
} stackentry;
typedef struct {
stackentry *s_top; /* Top entry */
stackentry s_base[MAXSTACK];/* Array of stack entries */
/* NB The stack grows down */
} stack;
typedef struct {
stack p_stack; /* Stack of parser states */
grammar *p_grammar; /* Grammar to use */
node *p_tree; /* Top of parse tree */
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
unsigned long p_flags; /* see co_flags in Include/code.h */
#endif
} parser_state;
parser_state *PyParser_New(grammar *g, int start);
void PyParser_Delete(parser_state *ps);
int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
int *expected_ret);
void PyGrammar_AddAccelerators(grammar *g);
#ifdef __cplusplus
}
#endif
#endif /* !Py_PARSER_H */

View File

@ -0,0 +1,282 @@
/* Parser-tokenizer link implementation */
#include "pgenheaders.h"
#include "tokenizer.h"
#include "node.h"
#include "grammar.h"
#include "parser.h"
#include "parsetok.h"
#include "errcode.h"
#include "graminit.h"
int Py_TabcheckFlag;
/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static void initerr(perrdetail *err_ret, const char* filename);
/* Parse input coming from a string. Return error code, print some errors. */
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
{
return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
}
node *
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
perrdetail *err_ret, int flags)
{
return PyParser_ParseStringFlagsFilename(s, NULL,
g, start, err_ret, flags);
}
node *
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
grammar *g, int start,
perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
err_ret, &iflags);
}
node *
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
grammar *g, int start,
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
initerr(err_ret, filename);
if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
tok->filename = filename ? filename : "<string>";
if (Py_TabcheckFlag || Py_VerboseFlag) {
tok->altwarning = (tok->filename != NULL);
if (Py_TabcheckFlag >= 2)
tok->alterror++;
}
return parsetok(tok, g, start, err_ret, flags);
}
/* Parse input coming from a file. Return error code, print some errors. */
node *
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
char *ps1, char *ps2, perrdetail *err_ret)
{
return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2,
err_ret, 0);
}
node *
PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
char *ps1, char *ps2, perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);
}
node *
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start,
char *ps1, char *ps2, perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
initerr(err_ret, filename);
if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM;
return NULL;
}
tok->filename = filename;
if (Py_TabcheckFlag || Py_VerboseFlag) {
tok->altwarning = (filename != NULL);
if (Py_TabcheckFlag >= 2)
tok->alterror++;
}
return parsetok(tok, g, start, err_ret, flags);
}
#if 0
static char with_msg[] =
"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
static char as_msg[] =
"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
static void
warn(const char *msg, const char *filename, int lineno)
{
if (filename == NULL)
filename = "<string>";
PySys_WriteStderr(msg, filename, lineno);
}
#endif
/* Parse input coming from the given tokenizer structure.
Return error code. */
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
int *flags)
{
parser_state *ps;
node *n;
int started = 0;
if ((ps = PyParser_New(g, start)) == NULL) {
fprintf(stderr, "no mem for new parser\n");
err_ret->error = E_NOMEM;
PyTokenizer_Free(tok);
return NULL;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
}
if (*flags & PyPARSE_UNICODE_LITERALS) {
ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
}
#endif
for (;;) {
char *a, *b;
int type;
size_t len;
char *str;
int col_offset;
type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
err_ret->error = tok->done;
break;
}
if (type == ENDMARKER && started) {
type = NEWLINE; /* Add an extra newline */
started = 0;
/* Add the right number of dedent tokens,
except if a certain flag is given --
codeop.py uses this. */
if (tok->indent &&
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
{
tok->pendin = -tok->indent;
tok->indent = 0;
}
}
else
started = 1;
len = b - a; /* XXX this may compute NULL - NULL */
str = (char *) PyObject_MALLOC(len + 1);
if (str == NULL) {
fprintf(stderr, "no mem for next token\n");
err_ret->error = E_NOMEM;
break;
}
if (len > 0)
strncpy(str, a, len);
str[len] = '\0';
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#endif
if (a >= tok->line_start)
col_offset = a - tok->line_start;
else
col_offset = -1;
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
err_ret->token = type;
}
break;
}
}
if (err_ret->error == E_DONE) {
n = ps->p_tree;
ps->p_tree = NULL;
}
else
n = NULL;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
*flags = ps->p_flags;
#endif
PyParser_Delete(ps);
if (n == NULL) {
if (tok->lineno <= 1 && tok->done == E_EOF)
err_ret->error = E_EOF;
err_ret->lineno = tok->lineno;
if (tok->buf != NULL) {
char *text = NULL;
size_t len;
assert(tok->cur - tok->buf < INT_MAX);
err_ret->offset = (int)(tok->cur - tok->buf);
len = tok->inp - tok->buf;
#ifdef Py_USING_UNICODE
text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
#endif
if (text == NULL) {
text = (char *) PyObject_MALLOC(len + 1);
if (text != NULL) {
if (len > 0)
strncpy(text, tok->buf, len);
text[len] = '\0';
}
}
err_ret->text = text;
}
} else if (tok->encoding != NULL) {
/* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
* allocated using PyMem_
*/
node* r = PyNode_New(encoding_decl);
if (r)
r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
if (!r || !r->n_str) {
err_ret->error = E_NOMEM;
if (r)
PyObject_FREE(r);
n = NULL;
goto done;
}
strcpy(r->n_str, tok->encoding);
PyMem_FREE(tok->encoding);
tok->encoding = NULL;
r->n_nchildren = 1;
r->n_child = n;
n = r;
}
done:
PyTokenizer_Free(tok);
return n;
}
static void
initerr(perrdetail *err_ret, const char *filename)
{
err_ret->error = E_OK;
err_ret->filename = filename;
err_ret->lineno = 0;
err_ret->offset = 0;
err_ret->text = NULL;
err_ret->token = -1;
err_ret->expected = -1;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,70 @@
#ifndef Py_TOKENIZER_H
#define Py_TOKENIZER_H
#ifdef __cplusplus
extern "C" {
#endif
#include "object.h"
/* Tokenizer interface */
#include "token.h" /* For token types */
#define MAXINDENT 100 /* Max indentation level */
/* Tokenizer state */
struct tok_state {
/* Input state; buf <= cur <= inp <= end */
/* NB an entire line is held in the buffer */
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
char *cur; /* Next character in buffer */
char *inp; /* End of data in buffer */
char *end; /* End of input buffer if buf != NULL */
char *start; /* Start of current token if not NULL */
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
/* NB If done != E_OK, cur must be == inp!!! */
FILE *fp; /* Rest of input; NULL if tokenizing a string */
int tabsize; /* Tab spacing */
int indent; /* Current indentation index */
int indstack[MAXINDENT]; /* Stack of indents */
int atbol; /* Nonzero if at begin of new line */
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
char *prompt, *nextprompt; /* For interactive prompting */
int lineno; /* Current line number */
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
const char *filename; /* For error messages */
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
int altindstack[MAXINDENT]; /* Stack of alternate indents */
/* Stuff for PEP 0263 */
int decoding_state; /* -1:decoding, 0:init, 1:raw */
int decoding_erred; /* whether erred in decoding */
int read_coding_spec; /* whether 'coding:...' has been read */
char *encoding;
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
#ifndef PGEN
PyObject *decoding_readline; /* codecs.open(...).readline */
PyObject *decoding_buffer;
#endif
const char* enc;
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
#if defined(PGEN) || defined(Py_USING_UNICODE)
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
int len, int *offset);
#endif
#ifdef __cplusplus
}
#endif
#endif /* !Py_TOKENIZER_H */