AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 2/5.

The Modules directory from the cPython 2.7.10 distribution.
These files are unchanged and set the baseline for subsequent commits.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Daryl McDaniel <edk2-lists@mc2research.org>


git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18738 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
Daryl McDaniel
2015-11-07 19:25:02 +00:00
committed by darylm503
parent c8042e1076
commit 7eb75bccb5
128 changed files with 146453 additions and 0 deletions

View File

@ -0,0 +1,246 @@
/* Bisection algorithms. Drop in replacement for bisect.py
Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru).
*/
#include "Python.h"
static Py_ssize_t
internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t hi)
{
PyObject *litem;
Py_ssize_t mid, res;
if (lo < 0) {
PyErr_SetString(PyExc_ValueError, "lo must be non-negative");
return -1;
}
if (hi == -1) {
hi = PySequence_Size(list);
if (hi < 0)
return -1;
}
while (lo < hi) {
/* The (size_t)cast ensures that the addition and subsequent division
are performed as unsigned operations, avoiding difficulties from
signed overflow. (See issue 13496.) */
mid = ((size_t)lo + hi) / 2;
litem = PySequence_GetItem(list, mid);
if (litem == NULL)
return -1;
res = PyObject_RichCompareBool(item, litem, Py_LT);
Py_DECREF(litem);
if (res < 0)
return -1;
if (res)
hi = mid;
else
lo = mid + 1;
}
return lo;
}
static PyObject *
bisect_right(PyObject *self, PyObject *args, PyObject *kw)
{
PyObject *list, *item;
Py_ssize_t lo = 0;
Py_ssize_t hi = -1;
Py_ssize_t index;
static char *keywords[] = {"a", "x", "lo", "hi", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:bisect_right",
keywords, &list, &item, &lo, &hi))
return NULL;
index = internal_bisect_right(list, item, lo, hi);
if (index < 0)
return NULL;
return PyInt_FromSsize_t(index);
}
PyDoc_STRVAR(bisect_right_doc,
"bisect(a, x[, lo[, hi]]) -> index\n\
bisect_right(a, x[, lo[, hi]]) -> index\n\
\n\
Return the index where to insert item x in list a, assuming a is sorted.\n\
\n\
The return value i is such that all e in a[:i] have e <= x, and all e in\n\
a[i:] have e > x. So if x already appears in the list, i points just\n\
beyond the rightmost x already there\n\
\n\
Optional args lo (default 0) and hi (default len(a)) bound the\n\
slice of a to be searched.\n");
static PyObject *
insort_right(PyObject *self, PyObject *args, PyObject *kw)
{
PyObject *list, *item, *result;
Py_ssize_t lo = 0;
Py_ssize_t hi = -1;
Py_ssize_t index;
static char *keywords[] = {"a", "x", "lo", "hi", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:insort_right",
keywords, &list, &item, &lo, &hi))
return NULL;
index = internal_bisect_right(list, item, lo, hi);
if (index < 0)
return NULL;
if (PyList_CheckExact(list)) {
if (PyList_Insert(list, index, item) < 0)
return NULL;
} else {
result = PyObject_CallMethod(list, "insert", "nO",
index, item);
if (result == NULL)
return NULL;
Py_DECREF(result);
}
Py_RETURN_NONE;
}
PyDoc_STRVAR(insort_right_doc,
"insort(a, x[, lo[, hi]])\n\
insort_right(a, x[, lo[, hi]])\n\
\n\
Insert item x in list a, and keep it sorted assuming a is sorted.\n\
\n\
If x is already in a, insert it to the right of the rightmost x.\n\
\n\
Optional args lo (default 0) and hi (default len(a)) bound the\n\
slice of a to be searched.\n");
static Py_ssize_t
internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t hi)
{
PyObject *litem;
Py_ssize_t mid, res;
if (lo < 0) {
PyErr_SetString(PyExc_ValueError, "lo must be non-negative");
return -1;
}
if (hi == -1) {
hi = PySequence_Size(list);
if (hi < 0)
return -1;
}
while (lo < hi) {
/* The (size_t)cast ensures that the addition and subsequent division
are performed as unsigned operations, avoiding difficulties from
signed overflow. (See issue 13496.) */
mid = ((size_t)lo + hi) / 2;
litem = PySequence_GetItem(list, mid);
if (litem == NULL)
return -1;
res = PyObject_RichCompareBool(litem, item, Py_LT);
Py_DECREF(litem);
if (res < 0)
return -1;
if (res)
lo = mid + 1;
else
hi = mid;
}
return lo;
}
static PyObject *
bisect_left(PyObject *self, PyObject *args, PyObject *kw)
{
PyObject *list, *item;
Py_ssize_t lo = 0;
Py_ssize_t hi = -1;
Py_ssize_t index;
static char *keywords[] = {"a", "x", "lo", "hi", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:bisect_left",
keywords, &list, &item, &lo, &hi))
return NULL;
index = internal_bisect_left(list, item, lo, hi);
if (index < 0)
return NULL;
return PyInt_FromSsize_t(index);
}
PyDoc_STRVAR(bisect_left_doc,
"bisect_left(a, x[, lo[, hi]]) -> index\n\
\n\
Return the index where to insert item x in list a, assuming a is sorted.\n\
\n\
The return value i is such that all e in a[:i] have e < x, and all e in\n\
a[i:] have e >= x. So if x already appears in the list, i points just\n\
before the leftmost x already there.\n\
\n\
Optional args lo (default 0) and hi (default len(a)) bound the\n\
slice of a to be searched.\n");
static PyObject *
insort_left(PyObject *self, PyObject *args, PyObject *kw)
{
PyObject *list, *item, *result;
Py_ssize_t lo = 0;
Py_ssize_t hi = -1;
Py_ssize_t index;
static char *keywords[] = {"a", "x", "lo", "hi", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:insort_left",
keywords, &list, &item, &lo, &hi))
return NULL;
index = internal_bisect_left(list, item, lo, hi);
if (index < 0)
return NULL;
if (PyList_CheckExact(list)) {
if (PyList_Insert(list, index, item) < 0)
return NULL;
} else {
result = PyObject_CallMethod(list, "insert", "nO",
index, item);
if (result == NULL)
return NULL;
Py_DECREF(result);
}
Py_RETURN_NONE;
}
PyDoc_STRVAR(insort_left_doc,
"insort_left(a, x[, lo[, hi]])\n\
\n\
Insert item x in list a, and keep it sorted assuming a is sorted.\n\
\n\
If x is already in a, insert it to the left of the leftmost x.\n\
\n\
Optional args lo (default 0) and hi (default len(a)) bound the\n\
slice of a to be searched.\n");
static PyMethodDef bisect_methods[] = {
{"bisect_right", (PyCFunction)bisect_right,
METH_VARARGS|METH_KEYWORDS, bisect_right_doc},
{"bisect", (PyCFunction)bisect_right,
METH_VARARGS|METH_KEYWORDS, bisect_right_doc},
{"insort_right", (PyCFunction)insort_right,
METH_VARARGS|METH_KEYWORDS, insort_right_doc},
{"insort", (PyCFunction)insort_right,
METH_VARARGS|METH_KEYWORDS, insort_right_doc},
{"bisect_left", (PyCFunction)bisect_left,
METH_VARARGS|METH_KEYWORDS, bisect_left_doc},
{"insort_left", (PyCFunction)insort_left,
METH_VARARGS|METH_KEYWORDS, insort_left_doc},
{NULL, NULL} /* sentinel */
};
PyDoc_STRVAR(module_doc,
"Bisection algorithms.\n\
\n\
This module provides support for maintaining a list in sorted order without\n\
having to sort the list after each insertion. For long lists of items with\n\
expensive comparison operations, this can be an improvement over the more\n\
common approach.\n");
PyMODINIT_FUNC
init_bisect(void)
{
Py_InitModule3("_bisect", bisect_methods, module_doc);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,398 @@
#include "Python.h"
#include "structmember.h"
/* _functools module written and maintained
by Hye-Shik Chang <perky@FreeBSD.org>
with adaptations by Raymond Hettinger <python@rcn.com>
Copyright (c) 2004, 2005, 2006 Python Software Foundation.
All rights reserved.
*/
/* reduce() *************************************************************/
static PyObject *
functools_reduce(PyObject *self, PyObject *args)
{
PyObject *seq, *func, *result = NULL, *it;
if (!PyArg_UnpackTuple(args, "reduce", 2, 3, &func, &seq, &result))
return NULL;
if (result != NULL)
Py_INCREF(result);
it = PyObject_GetIter(seq);
if (it == NULL) {
PyErr_SetString(PyExc_TypeError,
"reduce() arg 2 must support iteration");
Py_XDECREF(result);
return NULL;
}
if ((args = PyTuple_New(2)) == NULL)
goto Fail;
for (;;) {
PyObject *op2;
if (args->ob_refcnt > 1) {
Py_DECREF(args);
if ((args = PyTuple_New(2)) == NULL)
goto Fail;
}
op2 = PyIter_Next(it);
if (op2 == NULL) {
if (PyErr_Occurred())
goto Fail;
break;
}
if (result == NULL)
result = op2;
else {
PyTuple_SetItem(args, 0, result);
PyTuple_SetItem(args, 1, op2);
if ((result = PyEval_CallObject(func, args)) == NULL)
goto Fail;
}
}
Py_DECREF(args);
if (result == NULL)
PyErr_SetString(PyExc_TypeError,
"reduce() of empty sequence with no initial value");
Py_DECREF(it);
return result;
Fail:
Py_XDECREF(args);
Py_XDECREF(result);
Py_DECREF(it);
return NULL;
}
PyDoc_STRVAR(reduce_doc,
"reduce(function, sequence[, initial]) -> value\n\
\n\
Apply a function of two arguments cumulatively to the items of a sequence,\n\
from left to right, so as to reduce the sequence to a single value.\n\
For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates\n\
((((1+2)+3)+4)+5). If initial is present, it is placed before the items\n\
of the sequence in the calculation, and serves as a default when the\n\
sequence is empty.");
/* partial object **********************************************************/
typedef struct {
PyObject_HEAD
PyObject *fn;
PyObject *args;
PyObject *kw;
PyObject *dict;
PyObject *weakreflist; /* List of weak references */
} partialobject;
static PyTypeObject partial_type;
static PyObject *
partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{
PyObject *func;
partialobject *pto;
if (PyTuple_GET_SIZE(args) < 1) {
PyErr_SetString(PyExc_TypeError,
"type 'partial' takes at least one argument");
return NULL;
}
func = PyTuple_GET_ITEM(args, 0);
if (!PyCallable_Check(func)) {
PyErr_SetString(PyExc_TypeError,
"the first argument must be callable");
return NULL;
}
/* create partialobject structure */
pto = (partialobject *)type->tp_alloc(type, 0);
if (pto == NULL)
return NULL;
pto->fn = func;
Py_INCREF(func);
pto->args = PyTuple_GetSlice(args, 1, PY_SSIZE_T_MAX);
if (pto->args == NULL) {
pto->kw = NULL;
Py_DECREF(pto);
return NULL;
}
pto->kw = (kw != NULL) ? PyDict_Copy(kw) : PyDict_New();
if (pto->kw == NULL) {
Py_DECREF(pto);
return NULL;
}
pto->weakreflist = NULL;
pto->dict = NULL;
return (PyObject *)pto;
}
static void
partial_dealloc(partialobject *pto)
{
PyObject_GC_UnTrack(pto);
if (pto->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) pto);
Py_XDECREF(pto->fn);
Py_XDECREF(pto->args);
Py_XDECREF(pto->kw);
Py_XDECREF(pto->dict);
Py_TYPE(pto)->tp_free(pto);
}
static PyObject *
partial_call(partialobject *pto, PyObject *args, PyObject *kw)
{
PyObject *ret;
PyObject *argappl = NULL, *kwappl = NULL;
assert (PyCallable_Check(pto->fn));
assert (PyTuple_Check(pto->args));
assert (pto->kw == Py_None || PyDict_Check(pto->kw));
if (PyTuple_GET_SIZE(pto->args) == 0) {
argappl = args;
Py_INCREF(args);
} else if (PyTuple_GET_SIZE(args) == 0) {
argappl = pto->args;
Py_INCREF(pto->args);
} else {
argappl = PySequence_Concat(pto->args, args);
if (argappl == NULL)
return NULL;
}
if (pto->kw == Py_None) {
kwappl = kw;
Py_XINCREF(kw);
} else {
kwappl = PyDict_Copy(pto->kw);
if (kwappl == NULL) {
Py_DECREF(argappl);
return NULL;
}
if (kw != NULL) {
if (PyDict_Merge(kwappl, kw, 1) != 0) {
Py_DECREF(argappl);
Py_DECREF(kwappl);
return NULL;
}
}
}
ret = PyObject_Call(pto->fn, argappl, kwappl);
Py_DECREF(argappl);
Py_XDECREF(kwappl);
return ret;
}
static int
partial_traverse(partialobject *pto, visitproc visit, void *arg)
{
Py_VISIT(pto->fn);
Py_VISIT(pto->args);
Py_VISIT(pto->kw);
Py_VISIT(pto->dict);
return 0;
}
PyDoc_STRVAR(partial_doc,
"partial(func, *args, **keywords) - new function with partial application\n\
of the given arguments and keywords.\n");
#define OFF(x) offsetof(partialobject, x)
static PyMemberDef partial_memberlist[] = {
{"func", T_OBJECT, OFF(fn), READONLY,
"function object to use in future partial calls"},
{"args", T_OBJECT, OFF(args), READONLY,
"tuple of arguments to future partial calls"},
{"keywords", T_OBJECT, OFF(kw), READONLY,
"dictionary of keyword arguments to future partial calls"},
{NULL} /* Sentinel */
};
static PyObject *
partial_get_dict(partialobject *pto)
{
if (pto->dict == NULL) {
pto->dict = PyDict_New();
if (pto->dict == NULL)
return NULL;
}
Py_INCREF(pto->dict);
return pto->dict;
}
static int
partial_set_dict(partialobject *pto, PyObject *value)
{
PyObject *tmp;
/* It is illegal to del p.__dict__ */
if (value == NULL) {
PyErr_SetString(PyExc_TypeError,
"a partial object's dictionary may not be deleted");
return -1;
}
/* Can only set __dict__ to a dictionary */
if (!PyDict_Check(value)) {
PyErr_SetString(PyExc_TypeError,
"setting partial object's dictionary to a non-dict");
return -1;
}
tmp = pto->dict;
Py_INCREF(value);
pto->dict = value;
Py_XDECREF(tmp);
return 0;
}
static PyGetSetDef partial_getsetlist[] = {
{"__dict__", (getter)partial_get_dict, (setter)partial_set_dict},
{NULL} /* Sentinel */
};
/* Pickle strategy:
__reduce__ by itself doesn't support getting kwargs in the unpickle
operation so we define a __setstate__ that replaces all the information
about the partial. If we only replaced part of it someone would use
it as a hook to do strange things.
*/
PyObject *
partial_reduce(partialobject *pto, PyObject *unused)
{
return Py_BuildValue("O(O)(OOOO)", Py_TYPE(pto), pto->fn, pto->fn,
pto->args, pto->kw,
pto->dict ? pto->dict : Py_None);
}
PyObject *
partial_setstate(partialobject *pto, PyObject *state)
{
PyObject *fn, *fnargs, *kw, *dict;
if (!PyArg_ParseTuple(state, "OOOO",
&fn, &fnargs, &kw, &dict))
return NULL;
Py_XDECREF(pto->fn);
Py_XDECREF(pto->args);
Py_XDECREF(pto->kw);
Py_XDECREF(pto->dict);
pto->fn = fn;
pto->args = fnargs;
pto->kw = kw;
if (dict != Py_None) {
pto->dict = dict;
Py_INCREF(dict);
} else {
pto->dict = NULL;
}
Py_INCREF(fn);
Py_INCREF(fnargs);
Py_INCREF(kw);
Py_RETURN_NONE;
}
static PyMethodDef partial_methods[] = {
{"__reduce__", (PyCFunction)partial_reduce, METH_NOARGS},
{"__setstate__", (PyCFunction)partial_setstate, METH_O},
{NULL, NULL} /* sentinel */
};
static PyTypeObject partial_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"functools.partial", /* tp_name */
sizeof(partialobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)partial_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(ternaryfunc)partial_call, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
PyObject_GenericSetAttr, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
partial_doc, /* tp_doc */
(traverseproc)partial_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
offsetof(partialobject, weakreflist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
partial_methods, /* tp_methods */
partial_memberlist, /* tp_members */
partial_getsetlist, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
offsetof(partialobject, dict), /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
partial_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
/* module level code ********************************************************/
PyDoc_STRVAR(module_doc,
"Tools that operate on functions.");
static PyMethodDef module_methods[] = {
{"reduce", functools_reduce, METH_VARARGS, reduce_doc},
{NULL, NULL} /* sentinel */
};
PyMODINIT_FUNC
init_functools(void)
{
int i;
PyObject *m;
char *name;
PyTypeObject *typelist[] = {
&partial_type,
NULL
};
m = Py_InitModule3("_functools", module_methods, module_doc);
if (m == NULL)
return;
for (i=0 ; typelist[i] != NULL ; i++) {
if (PyType_Ready(typelist[i]) < 0)
return;
name = strchr(typelist[i]->tp_name, '.');
assert (name != NULL);
Py_INCREF(typelist[i]);
PyModule_AddObject(m, name+1, (PyObject *)typelist[i]);
}
}

View File

@ -0,0 +1,696 @@
/* Drop in replacement for heapq.py
C implementation derived directly from heapq.py in Py2.3
which was written by Kevin O'Connor, augmented by Tim Peters,
annotated by François Pinard, and converted to C by Raymond Hettinger.
*/
#include "Python.h"
/* Older implementations of heapq used Py_LE for comparisons. Now, it uses
Py_LT so it will match min(), sorted(), and bisect(). Unfortunately, some
client code (Twisted for example) relied on Py_LE, so this little function
restores compatibility by trying both.
*/
static int
cmp_lt(PyObject *x, PyObject *y)
{
int cmp;
static PyObject *lt = NULL;
if (lt == NULL) {
lt = PyString_FromString("__lt__");
if (lt == NULL)
return -1;
}
if (PyObject_HasAttr(x, lt))
return PyObject_RichCompareBool(x, y, Py_LT);
cmp = PyObject_RichCompareBool(y, x, Py_LE);
if (cmp != -1)
cmp = 1 - cmp;
return cmp;
}
static int
_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
{
PyObject *newitem, *parent;
Py_ssize_t parentpos, size;
int cmp;
assert(PyList_Check(heap));
size = PyList_GET_SIZE(heap);
if (pos >= size) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return -1;
}
/* Follow the path to the root, moving parents down until finding
a place newitem fits. */
newitem = PyList_GET_ITEM(heap, pos);
while (pos > startpos) {
parentpos = (pos - 1) >> 1;
parent = PyList_GET_ITEM(heap, parentpos);
cmp = cmp_lt(newitem, parent);
if (cmp == -1)
return -1;
if (size != PyList_GET_SIZE(heap)) {
PyErr_SetString(PyExc_RuntimeError,
"list changed size during iteration");
return -1;
}
if (cmp == 0)
break;
parent = PyList_GET_ITEM(heap, parentpos);
newitem = PyList_GET_ITEM(heap, pos);
PyList_SET_ITEM(heap, parentpos, newitem);
PyList_SET_ITEM(heap, pos, parent);
pos = parentpos;
}
return 0;
}
static int
_siftup(PyListObject *heap, Py_ssize_t pos)
{
Py_ssize_t startpos, endpos, childpos, rightpos, limit;
PyObject *tmp1, *tmp2;
int cmp;
assert(PyList_Check(heap));
endpos = PyList_GET_SIZE(heap);
startpos = pos;
if (pos >= endpos) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return -1;
}
/* Bubble up the smaller child until hitting a leaf. */
limit = endpos / 2; /* smallest pos that has no child */
while (pos < limit) {
/* Set childpos to index of smaller child. */
childpos = 2*pos + 1; /* leftmost child position */
rightpos = childpos + 1;
if (rightpos < endpos) {
cmp = cmp_lt(
PyList_GET_ITEM(heap, childpos),
PyList_GET_ITEM(heap, rightpos));
if (cmp == -1)
return -1;
if (cmp == 0)
childpos = rightpos;
if (endpos != PyList_GET_SIZE(heap)) {
PyErr_SetString(PyExc_RuntimeError,
"list changed size during iteration");
return -1;
}
}
/* Move the smaller child up. */
tmp1 = PyList_GET_ITEM(heap, childpos);
tmp2 = PyList_GET_ITEM(heap, pos);
PyList_SET_ITEM(heap, childpos, tmp2);
PyList_SET_ITEM(heap, pos, tmp1);
pos = childpos;
}
/* Bubble it up to its final resting place (by sifting its parents down). */
return _siftdown(heap, startpos, pos);
}
static PyObject *
heappush(PyObject *self, PyObject *args)
{
PyObject *heap, *item;
if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
return NULL;
if (!PyList_Check(heap)) {
PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
return NULL;
}
if (PyList_Append(heap, item) == -1)
return NULL;
if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
return NULL;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(heappush_doc,
"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
static PyObject *
heappop(PyObject *self, PyObject *heap)
{
PyObject *lastelt, *returnitem;
Py_ssize_t n;
if (!PyList_Check(heap)) {
PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
return NULL;
}
/* # raises appropriate IndexError if heap is empty */
n = PyList_GET_SIZE(heap);
if (n == 0) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return NULL;
}
lastelt = PyList_GET_ITEM(heap, n-1) ;
Py_INCREF(lastelt);
PyList_SetSlice(heap, n-1, n, NULL);
n--;
if (!n)
return lastelt;
returnitem = PyList_GET_ITEM(heap, 0);
PyList_SET_ITEM(heap, 0, lastelt);
if (_siftup((PyListObject *)heap, 0) == -1) {
Py_DECREF(returnitem);
return NULL;
}
return returnitem;
}
PyDoc_STRVAR(heappop_doc,
"Pop the smallest item off the heap, maintaining the heap invariant.");
static PyObject *
heapreplace(PyObject *self, PyObject *args)
{
PyObject *heap, *item, *returnitem;
if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
return NULL;
if (!PyList_Check(heap)) {
PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
return NULL;
}
if (PyList_GET_SIZE(heap) < 1) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return NULL;
}
returnitem = PyList_GET_ITEM(heap, 0);
Py_INCREF(item);
PyList_SET_ITEM(heap, 0, item);
if (_siftup((PyListObject *)heap, 0) == -1) {
Py_DECREF(returnitem);
return NULL;
}
return returnitem;
}
PyDoc_STRVAR(heapreplace_doc,
"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
\n\
This is more efficient than heappop() followed by heappush(), and can be\n\
more appropriate when using a fixed-size heap. Note that the value\n\
returned may be larger than item! That constrains reasonable uses of\n\
this routine unless written as part of a conditional replacement:\n\n\
if item > heap[0]:\n\
item = heapreplace(heap, item)\n");
static PyObject *
heappushpop(PyObject *self, PyObject *args)
{
PyObject *heap, *item, *returnitem;
int cmp;
if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
return NULL;
if (!PyList_Check(heap)) {
PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
return NULL;
}
if (PyList_GET_SIZE(heap) < 1) {
Py_INCREF(item);
return item;
}
cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
if (cmp == -1)
return NULL;
if (cmp == 0) {
Py_INCREF(item);
return item;
}
returnitem = PyList_GET_ITEM(heap, 0);
Py_INCREF(item);
PyList_SET_ITEM(heap, 0, item);
if (_siftup((PyListObject *)heap, 0) == -1) {
Py_DECREF(returnitem);
return NULL;
}
return returnitem;
}
PyDoc_STRVAR(heappushpop_doc,
"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
from the heap. The combined action runs more efficiently than\n\
heappush() followed by a separate call to heappop().");
static PyObject *
heapify(PyObject *self, PyObject *heap)
{
Py_ssize_t i, n;
if (!PyList_Check(heap)) {
PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
return NULL;
}
n = PyList_GET_SIZE(heap);
/* Transform bottom-up. The largest index there's any point to
looking at is the largest with a child index in-range, so must
have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
(2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
and that's again n//2-1.
*/
for (i=n/2-1 ; i>=0 ; i--)
if(_siftup((PyListObject *)heap, i) == -1)
return NULL;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(heapify_doc,
"Transform list into a heap, in-place, in O(len(heap)) time.");
static PyObject *
nlargest(PyObject *self, PyObject *args)
{
PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
Py_ssize_t i, n;
int cmp;
if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
return NULL;
it = PyObject_GetIter(iterable);
if (it == NULL)
return NULL;
heap = PyList_New(0);
if (heap == NULL)
goto fail;
for (i=0 ; i<n ; i++ ){
elem = PyIter_Next(it);
if (elem == NULL) {
if (PyErr_Occurred())
goto fail;
else
goto sortit;
}
if (PyList_Append(heap, elem) == -1) {
Py_DECREF(elem);
goto fail;
}
Py_DECREF(elem);
}
if (PyList_GET_SIZE(heap) == 0)
goto sortit;
for (i=n/2-1 ; i>=0 ; i--)
if(_siftup((PyListObject *)heap, i) == -1)
goto fail;
sol = PyList_GET_ITEM(heap, 0);
while (1) {
elem = PyIter_Next(it);
if (elem == NULL) {
if (PyErr_Occurred())
goto fail;
else
goto sortit;
}
cmp = cmp_lt(sol, elem);
if (cmp == -1) {
Py_DECREF(elem);
goto fail;
}
if (cmp == 0) {
Py_DECREF(elem);
continue;
}
oldelem = PyList_GET_ITEM(heap, 0);
PyList_SET_ITEM(heap, 0, elem);
Py_DECREF(oldelem);
if (_siftup((PyListObject *)heap, 0) == -1)
goto fail;
sol = PyList_GET_ITEM(heap, 0);
}
sortit:
if (PyList_Sort(heap) == -1)
goto fail;
if (PyList_Reverse(heap) == -1)
goto fail;
Py_DECREF(it);
return heap;
fail:
Py_DECREF(it);
Py_XDECREF(heap);
return NULL;
}
PyDoc_STRVAR(nlargest_doc,
"Find the n largest elements in a dataset.\n\
\n\
Equivalent to: sorted(iterable, reverse=True)[:n]\n");
static int
_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
{
PyObject *newitem, *parent;
int cmp;
Py_ssize_t parentpos;
assert(PyList_Check(heap));
if (pos >= PyList_GET_SIZE(heap)) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return -1;
}
newitem = PyList_GET_ITEM(heap, pos);
Py_INCREF(newitem);
/* Follow the path to the root, moving parents down until finding
a place newitem fits. */
while (pos > startpos){
parentpos = (pos - 1) >> 1;
parent = PyList_GET_ITEM(heap, parentpos);
cmp = cmp_lt(parent, newitem);
if (cmp == -1) {
Py_DECREF(newitem);
return -1;
}
if (cmp == 0)
break;
Py_INCREF(parent);
Py_DECREF(PyList_GET_ITEM(heap, pos));
PyList_SET_ITEM(heap, pos, parent);
pos = parentpos;
}
Py_DECREF(PyList_GET_ITEM(heap, pos));
PyList_SET_ITEM(heap, pos, newitem);
return 0;
}
static int
_siftupmax(PyListObject *heap, Py_ssize_t pos)
{
Py_ssize_t startpos, endpos, childpos, rightpos, limit;
int cmp;
PyObject *newitem, *tmp;
assert(PyList_Check(heap));
endpos = PyList_GET_SIZE(heap);
startpos = pos;
if (pos >= endpos) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return -1;
}
newitem = PyList_GET_ITEM(heap, pos);
Py_INCREF(newitem);
/* Bubble up the smaller child until hitting a leaf. */
limit = endpos / 2; /* smallest pos that has no child */
while (pos < limit) {
/* Set childpos to index of smaller child. */
childpos = 2*pos + 1; /* leftmost child position */
rightpos = childpos + 1;
if (rightpos < endpos) {
cmp = cmp_lt(
PyList_GET_ITEM(heap, rightpos),
PyList_GET_ITEM(heap, childpos));
if (cmp == -1) {
Py_DECREF(newitem);
return -1;
}
if (cmp == 0)
childpos = rightpos;
}
/* Move the smaller child up. */
tmp = PyList_GET_ITEM(heap, childpos);
Py_INCREF(tmp);
Py_DECREF(PyList_GET_ITEM(heap, pos));
PyList_SET_ITEM(heap, pos, tmp);
pos = childpos;
}
/* The leaf at pos is empty now. Put newitem there, and bubble
it up to its final resting place (by sifting its parents down). */
Py_DECREF(PyList_GET_ITEM(heap, pos));
PyList_SET_ITEM(heap, pos, newitem);
return _siftdownmax(heap, startpos, pos);
}
static PyObject *
nsmallest(PyObject *self, PyObject *args)
{
PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
Py_ssize_t i, n;
int cmp;
if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
return NULL;
it = PyObject_GetIter(iterable);
if (it == NULL)
return NULL;
heap = PyList_New(0);
if (heap == NULL)
goto fail;
for (i=0 ; i<n ; i++ ){
elem = PyIter_Next(it);
if (elem == NULL) {
if (PyErr_Occurred())
goto fail;
else
goto sortit;
}
if (PyList_Append(heap, elem) == -1) {
Py_DECREF(elem);
goto fail;
}
Py_DECREF(elem);
}
n = PyList_GET_SIZE(heap);
if (n == 0)
goto sortit;
for (i=n/2-1 ; i>=0 ; i--)
if(_siftupmax((PyListObject *)heap, i) == -1)
goto fail;
los = PyList_GET_ITEM(heap, 0);
while (1) {
elem = PyIter_Next(it);
if (elem == NULL) {
if (PyErr_Occurred())
goto fail;
else
goto sortit;
}
cmp = cmp_lt(elem, los);
if (cmp == -1) {
Py_DECREF(elem);
goto fail;
}
if (cmp == 0) {
Py_DECREF(elem);
continue;
}
oldelem = PyList_GET_ITEM(heap, 0);
PyList_SET_ITEM(heap, 0, elem);
Py_DECREF(oldelem);
if (_siftupmax((PyListObject *)heap, 0) == -1)
goto fail;
los = PyList_GET_ITEM(heap, 0);
}
sortit:
if (PyList_Sort(heap) == -1)
goto fail;
Py_DECREF(it);
return heap;
fail:
Py_DECREF(it);
Py_XDECREF(heap);
return NULL;
}
PyDoc_STRVAR(nsmallest_doc,
"Find the n smallest elements in a dataset.\n\
\n\
Equivalent to: sorted(iterable)[:n]\n");
static PyMethodDef heapq_methods[] = {
{"heappush", (PyCFunction)heappush,
METH_VARARGS, heappush_doc},
{"heappushpop", (PyCFunction)heappushpop,
METH_VARARGS, heappushpop_doc},
{"heappop", (PyCFunction)heappop,
METH_O, heappop_doc},
{"heapreplace", (PyCFunction)heapreplace,
METH_VARARGS, heapreplace_doc},
{"heapify", (PyCFunction)heapify,
METH_O, heapify_doc},
{"nlargest", (PyCFunction)nlargest,
METH_VARARGS, nlargest_doc},
{"nsmallest", (PyCFunction)nsmallest,
METH_VARARGS, nsmallest_doc},
{NULL, NULL} /* sentinel */
};
PyDoc_STRVAR(module_doc,
"Heap queue algorithm (a.k.a. priority queue).\n\
\n\
Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
all k, counting elements from 0. For the sake of comparison,\n\
non-existing elements are considered to be infinite. The interesting\n\
property of a heap is that a[0] is always its smallest element.\n\
\n\
Usage:\n\
\n\
heap = [] # creates an empty heap\n\
heappush(heap, item) # pushes a new item on the heap\n\
item = heappop(heap) # pops the smallest item from the heap\n\
item = heap[0] # smallest item on the heap without popping it\n\
heapify(x) # transforms list into a heap, in-place, in linear time\n\
item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
# new item; the heap size is unchanged\n\
\n\
Our API differs from textbook heap algorithms as follows:\n\
\n\
- We use 0-based indexing. This makes the relationship between the\n\
index for a node and the indexes for its children slightly less\n\
obvious, but is more suitable since Python uses 0-based indexing.\n\
\n\
- Our heappop() method returns the smallest item, not the largest.\n\
\n\
These two make it possible to view the heap as a regular Python list\n\
without surprises: heap[0] is the smallest item, and heap.sort()\n\
maintains the heap invariant!\n");
PyDoc_STRVAR(__about__,
"Heap queues\n\
\n\
[explanation by Fran<61>ois Pinard]\n\
\n\
Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
all k, counting elements from 0. For the sake of comparison,\n\
non-existing elements are considered to be infinite. The interesting\n\
property of a heap is that a[0] is always its smallest element.\n"
"\n\
The strange invariant above is meant to be an efficient memory\n\
representation for a tournament. The numbers below are `k', not a[k]:\n\
\n\
0\n\
\n\
1 2\n\
\n\
3 4 5 6\n\
\n\
7 8 9 10 11 12 13 14\n\
\n\
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
\n\
\n\
In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
an usual binary tournament we see in sports, each cell is the winner\n\
over the two cells it tops, and we can trace the winner down the tree\n\
to see all opponents s/he had. However, in many computer applications\n\
of such tournaments, we do not need to trace the history of a winner.\n\
To be more memory efficient, when a winner is promoted, we try to\n\
replace it by something else at a lower level, and the rule becomes\n\
that a cell and the two cells it tops contain three different items,\n\
but the top cell \"wins\" over the two topped cells.\n"
"\n\
If this heap invariant is protected at all time, index 0 is clearly\n\
the overall winner. The simplest algorithmic way to remove it and\n\
find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
diagram above) into the 0 position, and then percolate this new 0 down\n\
the tree, exchanging values, until the invariant is re-established.\n\
This is clearly logarithmic on the total number of items in the tree.\n\
By iterating over all items, you get an O(n ln n) sort.\n"
"\n\
A nice feature of this sort is that you can efficiently insert new\n\
items while the sort is going on, provided that the inserted items are\n\
not \"better\" than the last 0'th element you extracted. This is\n\
especially useful in simulation contexts, where the tree holds all\n\
incoming events, and the \"win\" condition means the smallest scheduled\n\
time. When an event schedule other events for execution, they are\n\
scheduled into the future, so they can easily go into the heap. So, a\n\
heap is a good structure for implementing schedulers (this is what I\n\
used for my MIDI sequencer :-).\n"
"\n\
Various structures for implementing schedulers have been extensively\n\
studied, and heaps are good for this, as they are reasonably speedy,\n\
the speed is almost constant, and the worst case is not much different\n\
than the average case. However, there are other representations which\n\
are more efficient overall, yet the worst cases might be terrible.\n"
"\n\
Heaps are also very useful in big disk sorts. You most probably all\n\
know that a big sort implies producing \"runs\" (which are pre-sorted\n\
sequences, which size is usually related to the amount of CPU memory),\n\
followed by a merging passes for these runs, which merging is often\n\
very cleverly organised[1]. It is very important that the initial\n\
sort produces the longest runs possible. Tournaments are a good way\n\
to that. If, using all the memory available to hold a tournament, you\n\
replace and percolate items that happen to fit the current run, you'll\n\
produce runs which are twice the size of the memory for random input,\n\
and much better for input fuzzily ordered.\n"
"\n\
Moreover, if you output the 0'th item on disk and get an input which\n\
may not fit in the current tournament (because the value \"wins\" over\n\
the last output value), it cannot fit in the heap, so the size of the\n\
heap decreases. The freed memory could be cleverly reused immediately\n\
for progressively building a second heap, which grows at exactly the\n\
same rate the first heap is melting. When the first heap completely\n\
vanishes, you switch heaps and start a new run. Clever and quite\n\
effective!\n\
\n\
In a word, heaps are useful memory structures to know. I use them in\n\
a few applications, and I think it is good to keep a `heap' module\n\
around. :-)\n"
"\n\
--------------------\n\
[1] The disk balancing algorithms which are current, nowadays, are\n\
more annoying than clever, and this is a consequence of the seeking\n\
capabilities of the disks. On devices which cannot seek, like big\n\
tape drives, the story was quite different, and one had to be very\n\
clever to ensure (far in advance) that each tape movement will be the\n\
most effective possible (that is, will best participate at\n\
\"progressing\" the merge). Some tapes were even able to read\n\
backwards, and this was also used to avoid the rewinding time.\n\
Believe me, real good tape sorts were quite spectacular to watch!\n\
From all times, sorting has always been a Great Art! :-)\n");
PyMODINIT_FUNC
init_heapq(void)
{
PyObject *m;
m = Py_InitModule3("_heapq", heapq_methods, module_doc);
if (m == NULL)
return;
PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
}

View File

@ -0,0 +1,777 @@
/*
An implementation of the new I/O lib as defined by PEP 3116 - "New I/O"
Classes defined here: UnsupportedOperation, BlockingIOError.
Functions defined here: open().
Mostly written by Amaury Forgeot d'Arc
*/
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "structmember.h"
#include "_iomodule.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
/* Various interned strings */
PyObject *_PyIO_str_close;
PyObject *_PyIO_str_closed;
PyObject *_PyIO_str_decode;
PyObject *_PyIO_str_encode;
PyObject *_PyIO_str_fileno;
PyObject *_PyIO_str_flush;
PyObject *_PyIO_str_getstate;
PyObject *_PyIO_str_isatty;
PyObject *_PyIO_str_newlines;
PyObject *_PyIO_str_nl;
PyObject *_PyIO_str_read;
PyObject *_PyIO_str_read1;
PyObject *_PyIO_str_readable;
PyObject *_PyIO_str_readinto;
PyObject *_PyIO_str_readline;
PyObject *_PyIO_str_reset;
PyObject *_PyIO_str_seek;
PyObject *_PyIO_str_seekable;
PyObject *_PyIO_str_setstate;
PyObject *_PyIO_str_tell;
PyObject *_PyIO_str_truncate;
PyObject *_PyIO_str_writable;
PyObject *_PyIO_str_write;
PyObject *_PyIO_empty_str;
PyObject *_PyIO_empty_bytes;
PyObject *_PyIO_zero;
PyDoc_STRVAR(module_doc,
"The io module provides the Python interfaces to stream handling. The\n"
"builtin open function is defined in this module.\n"
"\n"
"At the top of the I/O hierarchy is the abstract base class IOBase. It\n"
"defines the basic interface to a stream. Note, however, that there is no\n"
"separation between reading and writing to streams; implementations are\n"
"allowed to raise an IOError if they do not support a given operation.\n"
"\n"
"Extending IOBase is RawIOBase which deals simply with the reading and\n"
"writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide\n"
"an interface to OS files.\n"
"\n"
"BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its\n"
"subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer\n"
"streams that are readable, writable, and both respectively.\n"
"BufferedRandom provides a buffered interface to random access\n"
"streams. BytesIO is a simple stream of in-memory bytes.\n"
"\n"
"Another IOBase subclass, TextIOBase, deals with the encoding and decoding\n"
"of streams into text. TextIOWrapper, which extends it, is a buffered text\n"
"interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO\n"
"is a in-memory stream for text.\n"
"\n"
"Argument names are not part of the specification, and only the arguments\n"
"of open() are intended to be used as keyword arguments.\n"
"\n"
"data:\n"
"\n"
"DEFAULT_BUFFER_SIZE\n"
"\n"
" An int containing the default buffer size used by the module's buffered\n"
" I/O classes. open() uses the file's blksize (as obtained by os.stat) if\n"
" possible.\n"
);
/*
* BlockingIOError extends IOError
*/
static int
blockingioerror_init(PyBlockingIOErrorObject *self, PyObject *args,
PyObject *kwds)
{
PyObject *myerrno = NULL, *strerror = NULL;
PyObject *baseargs = NULL;
Py_ssize_t written = 0;
assert(PyTuple_Check(args));
self->written = 0;
if (!PyArg_ParseTuple(args, "OO|n:BlockingIOError",
&myerrno, &strerror, &written))
return -1;
baseargs = PyTuple_Pack(2, myerrno, strerror);
if (baseargs == NULL)
return -1;
/* This will take care of initializing of myerrno and strerror members */
if (((PyTypeObject *)PyExc_IOError)->tp_init(
(PyObject *)self, baseargs, kwds) == -1) {
Py_DECREF(baseargs);
return -1;
}
Py_DECREF(baseargs);
self->written = written;
return 0;
}
static PyMemberDef blockingioerror_members[] = {
{"characters_written", T_PYSSIZET, offsetof(PyBlockingIOErrorObject, written), 0},
{NULL} /* Sentinel */
};
static PyTypeObject _PyExc_BlockingIOError = {
PyVarObject_HEAD_INIT(NULL, 0)
"BlockingIOError", /*tp_name*/
sizeof(PyBlockingIOErrorObject), /*tp_basicsize*/
0, /*tp_itemsize*/
0, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare */
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
PyDoc_STR("Exception raised when I/O would block "
"on a non-blocking I/O stream"), /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
blockingioerror_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)blockingioerror_init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};
PyObject *PyExc_BlockingIOError = (PyObject *)&_PyExc_BlockingIOError;
/*
* The main open() function
*/
PyDoc_STRVAR(open_doc,
"Open file and return a stream. Raise IOError upon failure.\n"
"\n"
"file is either a text or byte string giving the name (and the path\n"
"if the file isn't in the current working directory) of the file to\n"
"be opened or an integer file descriptor of the file to be\n"
"wrapped. (If a file descriptor is given, it is closed when the\n"
"returned I/O object is closed, unless closefd is set to False.)\n"
"\n"
"mode is an optional string that specifies the mode in which the file\n"
"is opened. It defaults to 'r' which means open for reading in text\n"
"mode. Other common values are 'w' for writing (truncating the file if\n"
"it already exists), and 'a' for appending (which on some Unix systems,\n"
"means that all writes append to the end of the file regardless of the\n"
"current seek position). In text mode, if encoding is not specified the\n"
"encoding used is platform dependent. (For reading and writing raw\n"
"bytes use binary mode and leave encoding unspecified.) The available\n"
"modes are:\n"
"\n"
"========= ===============================================================\n"
"Character Meaning\n"
"--------- ---------------------------------------------------------------\n"
"'r' open for reading (default)\n"
"'w' open for writing, truncating the file first\n"
"'a' open for writing, appending to the end of the file if it exists\n"
"'b' binary mode\n"
"'t' text mode (default)\n"
"'+' open a disk file for updating (reading and writing)\n"
"'U' universal newline mode (for backwards compatibility; unneeded\n"
" for new code)\n"
"========= ===============================================================\n"
"\n"
"The default mode is 'rt' (open for reading text). For binary random\n"
"access, the mode 'w+b' opens and truncates the file to 0 bytes, while\n"
"'r+b' opens the file without truncation.\n"
"\n"
"Python distinguishes between files opened in binary and text modes,\n"
"even when the underlying operating system doesn't. Files opened in\n"
"binary mode (appending 'b' to the mode argument) return contents as\n"
"bytes objects without any decoding. In text mode (the default, or when\n"
"'t' is appended to the mode argument), the contents of the file are\n"
"returned as strings, the bytes having been first decoded using a\n"
"platform-dependent encoding or using the specified encoding if given.\n"
"\n"
"buffering is an optional integer used to set the buffering policy.\n"
"Pass 0 to switch buffering off (only allowed in binary mode), 1 to select\n"
"line buffering (only usable in text mode), and an integer > 1 to indicate\n"
"the size of a fixed-size chunk buffer. When no buffering argument is\n"
"given, the default buffering policy works as follows:\n"
"\n"
"* Binary files are buffered in fixed-size chunks; the size of the buffer\n"
" is chosen using a heuristic trying to determine the underlying device's\n"
" \"block size\" and falling back on `io.DEFAULT_BUFFER_SIZE`.\n"
" On many systems, the buffer will typically be 4096 or 8192 bytes long.\n"
"\n"
"* \"Interactive\" text files (files for which isatty() returns True)\n"
" use line buffering. Other text files use the policy described above\n"
" for binary files.\n"
"\n"
"encoding is the name of the encoding used to decode or encode the\n"
"file. This should only be used in text mode. The default encoding is\n"
"platform dependent, but any encoding supported by Python can be\n"
"passed. See the codecs module for the list of supported encodings.\n"
"\n"
"errors is an optional string that specifies how encoding errors are to\n"
"be handled---this argument should not be used in binary mode. Pass\n"
"'strict' to raise a ValueError exception if there is an encoding error\n"
"(the default of None has the same effect), or pass 'ignore' to ignore\n"
"errors. (Note that ignoring encoding errors can lead to data loss.)\n"
"See the documentation for codecs.register for a list of the permitted\n"
"encoding error strings.\n"
"\n"
"newline controls how universal newlines works (it only applies to text\n"
"mode). It can be None, '', '\\n', '\\r', and '\\r\\n'. It works as\n"
"follows:\n"
"\n"
"* On input, if newline is None, universal newlines mode is\n"
" enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
" these are translated into '\\n' before being returned to the\n"
" caller. If it is '', universal newline mode is enabled, but line\n"
" endings are returned to the caller untranslated. If it has any of\n"
" the other legal values, input lines are only terminated by the given\n"
" string, and the line ending is returned to the caller untranslated.\n"
"\n"
"* On output, if newline is None, any '\\n' characters written are\n"
" translated to the system default line separator, os.linesep. If\n"
" newline is '', no translation takes place. If newline is any of the\n"
" other legal values, any '\\n' characters written are translated to\n"
" the given string.\n"
"\n"
"If closefd is False, the underlying file descriptor will be kept open\n"
"when the file is closed. This does not work when a file name is given\n"
"and must be True in that case.\n"
"\n"
"open() returns a file object whose type depends on the mode, and\n"
"through which the standard file operations such as reading and writing\n"
"are performed. When open() is used to open a file in a text mode ('w',\n"
"'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open\n"
"a file in a binary mode, the returned class varies: in read binary\n"
"mode, it returns a BufferedReader; in write binary and append binary\n"
"modes, it returns a BufferedWriter, and in read/write mode, it returns\n"
"a BufferedRandom.\n"
"\n"
"It is also possible to use a string or bytearray as a file for both\n"
"reading and writing. For strings StringIO can be used like a file\n"
"opened in a text mode, and for bytes a BytesIO can be used like a file\n"
"opened in a binary mode.\n"
);
static PyObject *
io_open(PyObject *self, PyObject *args, PyObject *kwds)
{
char *kwlist[] = {"file", "mode", "buffering",
"encoding", "errors", "newline",
"closefd", NULL};
PyObject *file;
char *mode = "r";
int buffering = -1, closefd = 1;
char *encoding = NULL, *errors = NULL, *newline = NULL;
unsigned i;
int reading = 0, writing = 0, appending = 0, updating = 0;
int text = 0, binary = 0, universal = 0;
char rawmode[5], *m;
int line_buffering;
long isatty;
PyObject *raw, *modeobj = NULL, *buffer, *wrapper, *result = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|sizzzi:open", kwlist,
&file, &mode, &buffering,
&encoding, &errors, &newline,
&closefd)) {
return NULL;
}
if (!PyUnicode_Check(file) &&
!PyBytes_Check(file) &&
!PyNumber_Check(file)) {
PyObject *repr = PyObject_Repr(file);
if (repr != NULL) {
PyErr_Format(PyExc_TypeError, "invalid file: %s",
PyString_AS_STRING(repr));
Py_DECREF(repr);
}
return NULL;
}
/* Decode mode */
for (i = 0; i < strlen(mode); i++) {
char c = mode[i];
switch (c) {
case 'r':
reading = 1;
break;
case 'w':
writing = 1;
break;
case 'a':
appending = 1;
break;
case '+':
updating = 1;
break;
case 't':
text = 1;
break;
case 'b':
binary = 1;
break;
case 'U':
universal = 1;
reading = 1;
break;
default:
goto invalid_mode;
}
/* c must not be duplicated */
if (strchr(mode+i+1, c)) {
invalid_mode:
PyErr_Format(PyExc_ValueError, "invalid mode: '%s'", mode);
return NULL;
}
}
m = rawmode;
if (reading) *(m++) = 'r';
if (writing) *(m++) = 'w';
if (appending) *(m++) = 'a';
if (updating) *(m++) = '+';
*m = '\0';
/* Parameters validation */
if (universal) {
if (writing || appending) {
PyErr_SetString(PyExc_ValueError,
"can't use U and writing mode at once");
return NULL;
}
reading = 1;
}
if (text && binary) {
PyErr_SetString(PyExc_ValueError,
"can't have text and binary mode at once");
return NULL;
}
if (reading + writing + appending > 1) {
PyErr_SetString(PyExc_ValueError,
"must have exactly one of read/write/append mode");
return NULL;
}
if (binary && encoding != NULL) {
PyErr_SetString(PyExc_ValueError,
"binary mode doesn't take an encoding argument");
return NULL;
}
if (binary && errors != NULL) {
PyErr_SetString(PyExc_ValueError,
"binary mode doesn't take an errors argument");
return NULL;
}
if (binary && newline != NULL) {
PyErr_SetString(PyExc_ValueError,
"binary mode doesn't take a newline argument");
return NULL;
}
/* Create the Raw file stream */
raw = PyObject_CallFunction((PyObject *)&PyFileIO_Type,
"Osi", file, rawmode, closefd);
if (raw == NULL)
return NULL;
result = raw;
modeobj = PyUnicode_FromString(mode);
if (modeobj == NULL)
goto error;
/* buffering */
{
PyObject *res = PyObject_CallMethod(raw, "isatty", NULL);
if (res == NULL)
goto error;
isatty = PyLong_AsLong(res);
Py_DECREF(res);
if (isatty == -1 && PyErr_Occurred())
goto error;
}
if (buffering == 1 || (buffering < 0 && isatty)) {
buffering = -1;
line_buffering = 1;
}
else
line_buffering = 0;
if (buffering < 0) {
buffering = DEFAULT_BUFFER_SIZE;
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
{
struct stat st;
int fileno;
PyObject *res = PyObject_CallMethod(raw, "fileno", NULL);
if (res == NULL)
goto error;
fileno = _PyInt_AsInt(res);
Py_DECREF(res);
if (fileno == -1 && PyErr_Occurred())
goto error;
if (fstat(fileno, &st) >= 0 && st.st_blksize > 1)
buffering = st.st_blksize;
}
#endif
}
if (buffering < 0) {
PyErr_SetString(PyExc_ValueError,
"invalid buffering size");
goto error;
}
/* if not buffering, returns the raw file object */
if (buffering == 0) {
if (!binary) {
PyErr_SetString(PyExc_ValueError,
"can't have unbuffered text I/O");
goto error;
}
Py_DECREF(modeobj);
return result;
}
/* wraps into a buffered file */
{
PyObject *Buffered_class;
if (updating)
Buffered_class = (PyObject *)&PyBufferedRandom_Type;
else if (writing || appending)
Buffered_class = (PyObject *)&PyBufferedWriter_Type;
else if (reading)
Buffered_class = (PyObject *)&PyBufferedReader_Type;
else {
PyErr_Format(PyExc_ValueError,
"unknown mode: '%s'", mode);
goto error;
}
buffer = PyObject_CallFunction(Buffered_class, "Oi", raw, buffering);
}
if (buffer == NULL)
goto error;
result = buffer;
Py_DECREF(raw);
/* if binary, returns the buffered file */
if (binary) {
Py_DECREF(modeobj);
return result;
}
/* wraps into a TextIOWrapper */
wrapper = PyObject_CallFunction((PyObject *)&PyTextIOWrapper_Type,
"Osssi",
buffer,
encoding, errors, newline,
line_buffering);
if (wrapper == NULL)
goto error;
result = wrapper;
Py_DECREF(buffer);
if (PyObject_SetAttrString(wrapper, "mode", modeobj) < 0)
goto error;
Py_DECREF(modeobj);
return result;
error:
if (result != NULL) {
PyObject *exc, *val, *tb, *close_result;
PyErr_Fetch(&exc, &val, &tb);
close_result = PyObject_CallMethod(result, "close", NULL);
_PyErr_ReplaceException(exc, val, tb);
Py_XDECREF(close_result);
Py_DECREF(result);
}
Py_XDECREF(modeobj);
return NULL;
}
/*
* Private helpers for the io module.
*/
Py_off_t
PyNumber_AsOff_t(PyObject *item, PyObject *err)
{
Py_off_t result;
PyObject *runerr;
PyObject *value = PyNumber_Index(item);
if (value == NULL)
return -1;
if (PyInt_Check(value)) {
/* We assume a long always fits in a Py_off_t... */
result = (Py_off_t) PyInt_AS_LONG(value);
goto finish;
}
/* We're done if PyLong_AsSsize_t() returns without error. */
result = PyLong_AsOff_t(value);
if (result != -1 || !(runerr = PyErr_Occurred()))
goto finish;
/* Error handling code -- only manage OverflowError differently */
if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError))
goto finish;
PyErr_Clear();
/* If no error-handling desired then the default clipping
is sufficient.
*/
if (!err) {
assert(PyLong_Check(value));
/* Whether or not it is less than or equal to
zero is determined by the sign of ob_size
*/
if (_PyLong_Sign(value) < 0)
result = PY_OFF_T_MIN;
else
result = PY_OFF_T_MAX;
}
else {
/* Otherwise replace the error with caller's error object. */
PyErr_Format(err,
"cannot fit '%.200s' into an offset-sized integer",
item->ob_type->tp_name);
}
finish:
Py_DECREF(value);
return result;
}
/* Basically the "n" format code with the ability to turn None into -1. */
int
_PyIO_ConvertSsize_t(PyObject *obj, void *result) {
Py_ssize_t limit;
if (obj == Py_None) {
limit = -1;
}
else if (PyNumber_Check(obj)) {
limit = PyNumber_AsSsize_t(obj, PyExc_OverflowError);
if (limit == -1 && PyErr_Occurred())
return 0;
}
else {
PyErr_Format(PyExc_TypeError,
"integer argument expected, got '%.200s'",
Py_TYPE(obj)->tp_name);
return 0;
}
*((Py_ssize_t *)result) = limit;
return 1;
}
/*
* Module definition
*/
PyObject *_PyIO_os_module = NULL;
PyObject *_PyIO_locale_module = NULL;
PyObject *_PyIO_unsupported_operation = NULL;
static PyMethodDef module_methods[] = {
{"open", (PyCFunction)io_open, METH_VARARGS|METH_KEYWORDS, open_doc},
{NULL, NULL}
};
PyMODINIT_FUNC
init_io(void)
{
PyObject *m = Py_InitModule4("_io", module_methods,
module_doc, NULL, PYTHON_API_VERSION);
if (m == NULL)
return;
/* put os in the module state */
_PyIO_os_module = PyImport_ImportModule("os");
if (_PyIO_os_module == NULL)
goto fail;
#define ADD_TYPE(type, name) \
if (PyType_Ready(type) < 0) \
goto fail; \
Py_INCREF(type); \
if (PyModule_AddObject(m, name, (PyObject *)type) < 0) { \
Py_DECREF(type); \
goto fail; \
}
/* DEFAULT_BUFFER_SIZE */
if (PyModule_AddIntMacro(m, DEFAULT_BUFFER_SIZE) < 0)
goto fail;
/* UnsupportedOperation inherits from ValueError and IOError */
_PyIO_unsupported_operation = PyObject_CallFunction(
(PyObject *)&PyType_Type, "s(OO){}",
"UnsupportedOperation", PyExc_ValueError, PyExc_IOError);
if (_PyIO_unsupported_operation == NULL)
goto fail;
Py_INCREF(_PyIO_unsupported_operation);
if (PyModule_AddObject(m, "UnsupportedOperation",
_PyIO_unsupported_operation) < 0)
goto fail;
/* BlockingIOError */
_PyExc_BlockingIOError.tp_base = (PyTypeObject *) PyExc_IOError;
ADD_TYPE(&_PyExc_BlockingIOError, "BlockingIOError");
/* Concrete base types of the IO ABCs.
(the ABCs themselves are declared through inheritance in io.py)
*/
ADD_TYPE(&PyIOBase_Type, "_IOBase");
ADD_TYPE(&PyRawIOBase_Type, "_RawIOBase");
ADD_TYPE(&PyBufferedIOBase_Type, "_BufferedIOBase");
ADD_TYPE(&PyTextIOBase_Type, "_TextIOBase");
/* Implementation of concrete IO objects. */
/* FileIO */
PyFileIO_Type.tp_base = &PyRawIOBase_Type;
ADD_TYPE(&PyFileIO_Type, "FileIO");
/* BytesIO */
PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBytesIO_Type, "BytesIO");
/* StringIO */
PyStringIO_Type.tp_base = &PyTextIOBase_Type;
ADD_TYPE(&PyStringIO_Type, "StringIO");
/* BufferedReader */
PyBufferedReader_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBufferedReader_Type, "BufferedReader");
/* BufferedWriter */
PyBufferedWriter_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBufferedWriter_Type, "BufferedWriter");
/* BufferedRWPair */
PyBufferedRWPair_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBufferedRWPair_Type, "BufferedRWPair");
/* BufferedRandom */
PyBufferedRandom_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBufferedRandom_Type, "BufferedRandom");
/* TextIOWrapper */
PyTextIOWrapper_Type.tp_base = &PyTextIOBase_Type;
ADD_TYPE(&PyTextIOWrapper_Type, "TextIOWrapper");
/* IncrementalNewlineDecoder */
ADD_TYPE(&PyIncrementalNewlineDecoder_Type, "IncrementalNewlineDecoder");
/* Interned strings */
if (!(_PyIO_str_close = PyString_InternFromString("close")))
goto fail;
if (!(_PyIO_str_closed = PyString_InternFromString("closed")))
goto fail;
if (!(_PyIO_str_decode = PyString_InternFromString("decode")))
goto fail;
if (!(_PyIO_str_encode = PyString_InternFromString("encode")))
goto fail;
if (!(_PyIO_str_fileno = PyString_InternFromString("fileno")))
goto fail;
if (!(_PyIO_str_flush = PyString_InternFromString("flush")))
goto fail;
if (!(_PyIO_str_getstate = PyString_InternFromString("getstate")))
goto fail;
if (!(_PyIO_str_isatty = PyString_InternFromString("isatty")))
goto fail;
if (!(_PyIO_str_newlines = PyString_InternFromString("newlines")))
goto fail;
if (!(_PyIO_str_nl = PyString_InternFromString("\n")))
goto fail;
if (!(_PyIO_str_read = PyString_InternFromString("read")))
goto fail;
if (!(_PyIO_str_read1 = PyString_InternFromString("read1")))
goto fail;
if (!(_PyIO_str_readable = PyString_InternFromString("readable")))
goto fail;
if (!(_PyIO_str_readinto = PyString_InternFromString("readinto")))
goto fail;
if (!(_PyIO_str_readline = PyString_InternFromString("readline")))
goto fail;
if (!(_PyIO_str_reset = PyString_InternFromString("reset")))
goto fail;
if (!(_PyIO_str_seek = PyString_InternFromString("seek")))
goto fail;
if (!(_PyIO_str_seekable = PyString_InternFromString("seekable")))
goto fail;
if (!(_PyIO_str_setstate = PyString_InternFromString("setstate")))
goto fail;
if (!(_PyIO_str_tell = PyString_InternFromString("tell")))
goto fail;
if (!(_PyIO_str_truncate = PyString_InternFromString("truncate")))
goto fail;
if (!(_PyIO_str_write = PyString_InternFromString("write")))
goto fail;
if (!(_PyIO_str_writable = PyString_InternFromString("writable")))
goto fail;
if (!(_PyIO_empty_str = PyUnicode_FromStringAndSize(NULL, 0)))
goto fail;
if (!(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0)))
goto fail;
if (!(_PyIO_zero = PyLong_FromLong(0L)))
goto fail;
return;
fail:
Py_CLEAR(_PyIO_os_module);
Py_CLEAR(_PyIO_unsupported_operation);
Py_DECREF(m);
}

View File

@ -0,0 +1,170 @@
/*
* Declarations shared between the different parts of the io module
*/
/* ABCs */
extern PyTypeObject PyIOBase_Type;
extern PyTypeObject PyRawIOBase_Type;
extern PyTypeObject PyBufferedIOBase_Type;
extern PyTypeObject PyTextIOBase_Type;
/* Concrete classes */
extern PyTypeObject PyFileIO_Type;
extern PyTypeObject PyBytesIO_Type;
extern PyTypeObject PyStringIO_Type;
extern PyTypeObject PyBufferedReader_Type;
extern PyTypeObject PyBufferedWriter_Type;
extern PyTypeObject PyBufferedRWPair_Type;
extern PyTypeObject PyBufferedRandom_Type;
extern PyTypeObject PyTextIOWrapper_Type;
extern PyTypeObject PyIncrementalNewlineDecoder_Type;
extern int _PyIO_ConvertSsize_t(PyObject *, void *);
/* These functions are used as METH_NOARGS methods, are normally called
* with args=NULL, and return a new reference.
* BUT when args=Py_True is passed, they return a borrowed reference.
*/
extern PyObject* _PyIOBase_check_readable(PyObject *self, PyObject *args);
extern PyObject* _PyIOBase_check_writable(PyObject *self, PyObject *args);
extern PyObject* _PyIOBase_check_seekable(PyObject *self, PyObject *args);
extern PyObject* _PyIOBase_check_closed(PyObject *self, PyObject *args);
/* Helper for finalization.
This function will revive an object ready to be deallocated and try to
close() it. It returns 0 if the object can be destroyed, or -1 if it
is alive again. */
extern int _PyIOBase_finalize(PyObject *self);
/* Returns true if the given FileIO object is closed.
Doesn't check the argument type, so be careful! */
extern int _PyFileIO_closed(PyObject *self);
/* Shortcut to the core of the IncrementalNewlineDecoder.decode method */
extern PyObject *_PyIncrementalNewlineDecoder_decode(
PyObject *self, PyObject *input, int final);
/* Finds the first line ending between `start` and `end`.
If found, returns the index after the line ending and doesn't touch
`*consumed`.
If not found, returns -1 and sets `*consumed` to the number of characters
which can be safely put aside until another search.
NOTE: for performance reasons, `end` must point to a NUL character ('\0').
Otherwise, the function will scan further and return garbage. */
extern Py_ssize_t _PyIO_find_line_ending(
int translated, int universal, PyObject *readnl,
Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed);
/* Return 1 if an EnvironmentError with errno == EINTR is set (and then
clears the error indicator), 0 otherwise.
Should only be called when PyErr_Occurred() is true.
*/
extern int _PyIO_trap_eintr(void);
#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */
typedef struct {
/* This is the equivalent of PyException_HEAD in 3.x */
PyObject_HEAD
PyObject *dict;
PyObject *args;
PyObject *message;
PyObject *myerrno;
PyObject *strerror;
PyObject *filename; /* Not used, but part of the IOError object */
Py_ssize_t written;
} PyBlockingIOErrorObject;
extern PyObject *PyExc_BlockingIOError;
/*
* Offset type for positioning.
*/
/* Printing a variable of type off_t (with e.g., PyString_FromFormat)
correctly and without producing compiler warnings is surprisingly painful.
We identify an integer type whose size matches off_t and then: (1) cast the
off_t to that integer type and (2) use the appropriate conversion
specification. The cast is necessary: gcc complains about formatting a
long with "%lld" even when both long and long long have the same
precision. */
#if defined(MS_WIN64) || defined(MS_WINDOWS)
/* Windows uses long long for offsets */
typedef PY_LONG_LONG Py_off_t;
# define PyLong_AsOff_t PyLong_AsLongLong
# define PyLong_FromOff_t PyLong_FromLongLong
# define PY_OFF_T_MAX PY_LLONG_MAX
# define PY_OFF_T_MIN PY_LLONG_MIN
# define PY_OFF_T_COMPAT PY_LONG_LONG /* type compatible with off_t */
# define PY_PRIdOFF "lld" /* format to use for that type */
#else
/* Other platforms use off_t */
typedef off_t Py_off_t;
#if (SIZEOF_OFF_T == SIZEOF_SIZE_T)
# define PyLong_AsOff_t PyLong_AsSsize_t
# define PyLong_FromOff_t PyLong_FromSsize_t
# define PY_OFF_T_MAX PY_SSIZE_T_MAX
# define PY_OFF_T_MIN PY_SSIZE_T_MIN
# define PY_OFF_T_COMPAT Py_ssize_t
# define PY_PRIdOFF "zd"
#elif (HAVE_LONG_LONG && SIZEOF_OFF_T == SIZEOF_LONG_LONG)
# define PyLong_AsOff_t PyLong_AsLongLong
# define PyLong_FromOff_t PyLong_FromLongLong
# define PY_OFF_T_MAX PY_LLONG_MAX
# define PY_OFF_T_MIN PY_LLONG_MIN
# define PY_OFF_T_COMPAT PY_LONG_LONG
# define PY_PRIdOFF "lld"
#elif (SIZEOF_OFF_T == SIZEOF_LONG)
# define PyLong_AsOff_t PyLong_AsLong
# define PyLong_FromOff_t PyLong_FromLong
# define PY_OFF_T_MAX LONG_MAX
# define PY_OFF_T_MIN LONG_MIN
# define PY_OFF_T_COMPAT long
# define PY_PRIdOFF "ld"
#else
# error off_t does not match either size_t, long, or long long!
#endif
#endif
extern Py_off_t PyNumber_AsOff_t(PyObject *item, PyObject *err);
/* Implementation details */
extern PyObject *_PyIO_os_module;
extern PyObject *_PyIO_locale_module;
extern PyObject *_PyIO_unsupported_operation;
extern PyObject *_PyIO_str_close;
extern PyObject *_PyIO_str_closed;
extern PyObject *_PyIO_str_decode;
extern PyObject *_PyIO_str_encode;
extern PyObject *_PyIO_str_fileno;
extern PyObject *_PyIO_str_flush;
extern PyObject *_PyIO_str_getstate;
extern PyObject *_PyIO_str_isatty;
extern PyObject *_PyIO_str_newlines;
extern PyObject *_PyIO_str_nl;
extern PyObject *_PyIO_str_read;
extern PyObject *_PyIO_str_read1;
extern PyObject *_PyIO_str_readable;
extern PyObject *_PyIO_str_readinto;
extern PyObject *_PyIO_str_readline;
extern PyObject *_PyIO_str_reset;
extern PyObject *_PyIO_str_seek;
extern PyObject *_PyIO_str_seekable;
extern PyObject *_PyIO_str_setstate;
extern PyObject *_PyIO_str_tell;
extern PyObject *_PyIO_str_truncate;
extern PyObject *_PyIO_str_writable;
extern PyObject *_PyIO_str_write;
extern PyObject *_PyIO_empty_str;
extern PyObject *_PyIO_empty_bytes;
extern PyObject *_PyIO_zero;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,909 @@
#include "Python.h"
#include "structmember.h" /* for offsetof() */
#include "_iomodule.h"
typedef struct {
PyObject_HEAD
char *buf;
Py_ssize_t pos;
Py_ssize_t string_size;
size_t buf_size;
PyObject *dict;
PyObject *weakreflist;
} bytesio;
#define CHECK_CLOSED(self) \
if ((self)->buf == NULL) { \
PyErr_SetString(PyExc_ValueError, \
"I/O operation on closed file."); \
return NULL; \
}
/* Internal routine to get a line from the buffer of a BytesIO
object. Returns the length between the current position to the
next newline character. */
static Py_ssize_t
get_line(bytesio *self, char **output)
{
char *n;
const char *str_end;
Py_ssize_t len;
assert(self->buf != NULL);
/* Move to the end of the line, up to the end of the string, s. */
str_end = self->buf + self->string_size;
for (n = self->buf + self->pos;
n < str_end && *n != '\n';
n++);
/* Skip the newline character */
if (n < str_end)
n++;
/* Get the length from the current position to the end of the line. */
len = n - (self->buf + self->pos);
*output = self->buf + self->pos;
assert(len >= 0);
assert(self->pos < PY_SSIZE_T_MAX - len);
self->pos += len;
return len;
}
/* Internal routine for changing the size of the buffer of BytesIO objects.
The caller should ensure that the 'size' argument is non-negative. Returns
0 on success, -1 otherwise. */
static int
resize_buffer(bytesio *self, size_t size)
{
/* Here, unsigned types are used to avoid dealing with signed integer
overflow, which is undefined in C. */
size_t alloc = self->buf_size;
char *new_buf = NULL;
assert(self->buf != NULL);
/* For simplicity, stay in the range of the signed type. Anyway, Python
doesn't allow strings to be longer than this. */
if (size > PY_SSIZE_T_MAX)
goto overflow;
if (size < alloc / 2) {
/* Major downsize; resize down to exact size. */
alloc = size + 1;
}
else if (size < alloc) {
/* Within allocated size; quick exit */
return 0;
}
else if (size <= alloc * 1.125) {
/* Moderate upsize; overallocate similar to list_resize() */
alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
}
else {
/* Major upsize; resize up to exact size */
alloc = size + 1;
}
if (alloc > ((size_t)-1) / sizeof(char))
goto overflow;
new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char));
if (new_buf == NULL) {
PyErr_NoMemory();
return -1;
}
self->buf_size = alloc;
self->buf = new_buf;
return 0;
overflow:
PyErr_SetString(PyExc_OverflowError,
"new buffer size too large");
return -1;
}
/* Internal routine for writing a string of bytes to the buffer of a BytesIO
object. Returns the number of bytes written, or -1 on error. */
static Py_ssize_t
write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
{
assert(self->buf != NULL);
assert(self->pos >= 0);
assert(len >= 0);
if ((size_t)self->pos + len > self->buf_size) {
if (resize_buffer(self, (size_t)self->pos + len) < 0)
return -1;
}
if (self->pos > self->string_size) {
/* In case of overseek, pad with null bytes the buffer region between
the end of stream and the current position.
0 lo string_size hi
| |<---used--->|<----------available----------->|
| | <--to pad-->|<---to write---> |
0 buf position
*/
memset(self->buf + self->string_size, '\0',
(self->pos - self->string_size) * sizeof(char));
}
/* Copy the data to the internal buffer, overwriting some of the existing
data if self->pos < self->string_size. */
memcpy(self->buf + self->pos, bytes, len);
self->pos += len;
/* Set the new length of the internal string if it has changed. */
if (self->string_size < self->pos) {
self->string_size = self->pos;
}
return len;
}
static PyObject *
bytesio_get_closed(bytesio *self)
{
if (self->buf == NULL) {
Py_RETURN_TRUE;
}
else {
Py_RETURN_FALSE;
}
}
PyDoc_STRVAR(readable_doc,
"readable() -> bool. Returns True if the IO object can be read.");
PyDoc_STRVAR(writable_doc,
"writable() -> bool. Returns True if the IO object can be written.");
PyDoc_STRVAR(seekable_doc,
"seekable() -> bool. Returns True if the IO object can be seeked.");
/* Generic getter for the writable, readable and seekable properties */
static PyObject *
return_not_closed(bytesio *self)
{
CHECK_CLOSED(self);
Py_RETURN_TRUE;
}
PyDoc_STRVAR(flush_doc,
"flush() -> None. Does nothing.");
static PyObject *
bytesio_flush(bytesio *self)
{
CHECK_CLOSED(self);
Py_RETURN_NONE;
}
PyDoc_STRVAR(getval_doc,
"getvalue() -> bytes.\n"
"\n"
"Retrieve the entire contents of the BytesIO object.");
static PyObject *
bytesio_getvalue(bytesio *self)
{
CHECK_CLOSED(self);
return PyBytes_FromStringAndSize(self->buf, self->string_size);
}
PyDoc_STRVAR(isatty_doc,
"isatty() -> False.\n"
"\n"
"Always returns False since BytesIO objects are not connected\n"
"to a tty-like device.");
static PyObject *
bytesio_isatty(bytesio *self)
{
CHECK_CLOSED(self);
Py_RETURN_FALSE;
}
PyDoc_STRVAR(tell_doc,
"tell() -> current file position, an integer\n");
static PyObject *
bytesio_tell(bytesio *self)
{
CHECK_CLOSED(self);
return PyLong_FromSsize_t(self->pos);
}
PyDoc_STRVAR(read_doc,
"read([size]) -> read at most size bytes, returned as a string.\n"
"\n"
"If the size argument is negative, read until EOF is reached.\n"
"Return an empty string at EOF.");
static PyObject *
bytesio_read(bytesio *self, PyObject *args)
{
Py_ssize_t size, n;
char *output;
PyObject *arg = Py_None;
CHECK_CLOSED(self);
if (!PyArg_ParseTuple(args, "|O:read", &arg))
return NULL;
if (PyNumber_Check(arg)) {
size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
else if (arg == Py_None) {
/* Read until EOF is reached, by default. */
size = -1;
}
else {
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}
/* adjust invalid sizes */
n = self->string_size - self->pos;
if (size < 0 || size > n) {
size = n;
if (size < 0)
size = 0;
}
assert(self->buf != NULL);
output = self->buf + self->pos;
self->pos += size;
return PyBytes_FromStringAndSize(output, size);
}
PyDoc_STRVAR(read1_doc,
"read1(size) -> read at most size bytes, returned as a string.\n"
"\n"
"If the size argument is negative or omitted, read until EOF is reached.\n"
"Return an empty string at EOF.");
static PyObject *
bytesio_read1(bytesio *self, PyObject *n)
{
PyObject *arg, *res;
arg = PyTuple_Pack(1, n);
if (arg == NULL)
return NULL;
res = bytesio_read(self, arg);
Py_DECREF(arg);
return res;
}
PyDoc_STRVAR(readline_doc,
"readline([size]) -> next line from the file, as a string.\n"
"\n"
"Retain newline. A non-negative size argument limits the maximum\n"
"number of bytes to return (an incomplete line may be returned then).\n"
"Return an empty string at EOF.\n");
static PyObject *
bytesio_readline(bytesio *self, PyObject *args)
{
Py_ssize_t size, n;
char *output;
PyObject *arg = Py_None;
CHECK_CLOSED(self);
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
return NULL;
if (PyNumber_Check(arg)) {
size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
else if (arg == Py_None) {
/* No size limit, by default. */
size = -1;
}
else {
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}
n = get_line(self, &output);
if (size >= 0 && size < n) {
size = n - size;
n -= size;
self->pos -= size;
}
return PyBytes_FromStringAndSize(output, n);
}
PyDoc_STRVAR(readlines_doc,
"readlines([size]) -> list of strings, each a line from the file.\n"
"\n"
"Call readline() repeatedly and return a list of the lines so read.\n"
"The optional size argument, if given, is an approximate bound on the\n"
"total number of bytes in the lines returned.\n");
static PyObject *
bytesio_readlines(bytesio *self, PyObject *args)
{
Py_ssize_t maxsize, size, n;
PyObject *result, *line;
char *output;
PyObject *arg = Py_None;
CHECK_CLOSED(self);
if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
return NULL;
if (PyNumber_Check(arg)) {
maxsize = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (maxsize == -1 && PyErr_Occurred())
return NULL;
}
else if (arg == Py_None) {
/* No size limit, by default. */
maxsize = -1;
}
else {
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}
size = 0;
result = PyList_New(0);
if (!result)
return NULL;
while ((n = get_line(self, &output)) != 0) {
line = PyBytes_FromStringAndSize(output, n);
if (!line)
goto on_error;
if (PyList_Append(result, line) == -1) {
Py_DECREF(line);
goto on_error;
}
Py_DECREF(line);
size += n;
if (maxsize > 0 && size >= maxsize)
break;
}
return result;
on_error:
Py_DECREF(result);
return NULL;
}
PyDoc_STRVAR(readinto_doc,
"readinto(bytearray) -> int. Read up to len(b) bytes into b.\n"
"\n"
"Returns number of bytes read (0 for EOF), or None if the object\n"
"is set not to block as has no data to read.");
static PyObject *
bytesio_readinto(bytesio *self, PyObject *args)
{
Py_buffer buf;
Py_ssize_t len, n;
CHECK_CLOSED(self);
if (!PyArg_ParseTuple(args, "w*", &buf))
return NULL;
len = buf.len;
/* adjust invalid sizes */
n = self->string_size - self->pos;
if (len > n) {
len = n;
if (len < 0)
len = 0;
}
memcpy(buf.buf, self->buf + self->pos, len);
assert(self->pos + len < PY_SSIZE_T_MAX);
assert(len >= 0);
self->pos += len;
PyBuffer_Release(&buf);
return PyLong_FromSsize_t(len);
}
PyDoc_STRVAR(truncate_doc,
"truncate([size]) -> int. Truncate the file to at most size bytes.\n"
"\n"
"Size defaults to the current file position, as returned by tell().\n"
"The current file position is unchanged. Returns the new size.\n");
static PyObject *
bytesio_truncate(bytesio *self, PyObject *args)
{
Py_ssize_t size;
PyObject *arg = Py_None;
CHECK_CLOSED(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL;
if (PyNumber_Check(arg)) {
size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
else if (arg == Py_None) {
/* Truncate to current position if no argument is passed. */
size = self->pos;
}
else {
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}
if (size < 0) {
PyErr_Format(PyExc_ValueError,
"negative size value %zd", size);
return NULL;
}
if (size < self->string_size) {
self->string_size = size;
if (resize_buffer(self, size) < 0)
return NULL;
}
return PyLong_FromSsize_t(size);
}
static PyObject *
bytesio_iternext(bytesio *self)
{
char *next;
Py_ssize_t n;
CHECK_CLOSED(self);
n = get_line(self, &next);
if (!next || n == 0)
return NULL;
return PyBytes_FromStringAndSize(next, n);
}
PyDoc_STRVAR(seek_doc,
"seek(pos, whence=0) -> int. Change stream position.\n"
"\n"
"Seek to byte offset pos relative to position indicated by whence:\n"
" 0 Start of stream (the default). pos should be >= 0;\n"
" 1 Current position - pos may be negative;\n"
" 2 End of stream - pos usually negative.\n"
"Returns the new absolute position.");
static PyObject *
bytesio_seek(bytesio *self, PyObject *args)
{
PyObject *posobj;
Py_ssize_t pos;
int mode = 0;
CHECK_CLOSED(self);
if (!PyArg_ParseTuple(args, "O|i:seek", &posobj, &mode))
return NULL;
pos = PyNumber_AsSsize_t(posobj, PyExc_OverflowError);
if (pos == -1 && PyErr_Occurred())
return NULL;
if (pos < 0 && mode == 0) {
PyErr_Format(PyExc_ValueError,
"negative seek value %zd", pos);
return NULL;
}
/* mode 0: offset relative to beginning of the string.
mode 1: offset relative to current position.
mode 2: offset relative the end of the string. */
if (mode == 1) {
if (pos > PY_SSIZE_T_MAX - self->pos) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
return NULL;
}
pos += self->pos;
}
else if (mode == 2) {
if (pos > PY_SSIZE_T_MAX - self->string_size) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
return NULL;
}
pos += self->string_size;
}
else if (mode != 0) {
PyErr_Format(PyExc_ValueError,
"invalid whence (%i, should be 0, 1 or 2)", mode);
return NULL;
}
if (pos < 0)
pos = 0;
self->pos = pos;
return PyLong_FromSsize_t(self->pos);
}
PyDoc_STRVAR(write_doc,
"write(bytes) -> int. Write bytes to file.\n"
"\n"
"Return the number of bytes written.");
static PyObject *
bytesio_write(bytesio *self, PyObject *obj)
{
Py_ssize_t n = 0;
Py_buffer buf;
PyObject *result = NULL;
CHECK_CLOSED(self);
if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
return NULL;
if (buf.len != 0)
n = write_bytes(self, buf.buf, buf.len);
if (n >= 0)
result = PyLong_FromSsize_t(n);
PyBuffer_Release(&buf);
return result;
}
PyDoc_STRVAR(writelines_doc,
"writelines(sequence_of_strings) -> None. Write strings to the file.\n"
"\n"
"Note that newlines are not added. The sequence can be any iterable\n"
"object producing strings. This is equivalent to calling write() for\n"
"each string.");
static PyObject *
bytesio_writelines(bytesio *self, PyObject *v)
{
PyObject *it, *item;
PyObject *ret;
CHECK_CLOSED(self);
it = PyObject_GetIter(v);
if (it == NULL)
return NULL;
while ((item = PyIter_Next(it)) != NULL) {
ret = bytesio_write(self, item);
Py_DECREF(item);
if (ret == NULL) {
Py_DECREF(it);
return NULL;
}
Py_DECREF(ret);
}
Py_DECREF(it);
/* See if PyIter_Next failed */
if (PyErr_Occurred())
return NULL;
Py_RETURN_NONE;
}
PyDoc_STRVAR(close_doc,
"close() -> None. Disable all I/O operations.");
static PyObject *
bytesio_close(bytesio *self)
{
if (self->buf != NULL) {
PyMem_Free(self->buf);
self->buf = NULL;
}
Py_RETURN_NONE;
}
/* Pickling support.
Note that only pickle protocol 2 and onward are supported since we use
extended __reduce__ API of PEP 307 to make BytesIO instances picklable.
Providing support for protocol < 2 would require the __reduce_ex__ method
which is notably long-winded when defined properly.
For BytesIO, the implementation would similar to one coded for
object.__reduce_ex__, but slightly less general. To be more specific, we
could call bytesio_getstate directly and avoid checking for the presence of
a fallback __reduce__ method. However, we would still need a __newobj__
function to use the efficient instance representation of PEP 307.
*/
static PyObject *
bytesio_getstate(bytesio *self)
{
PyObject *initvalue = bytesio_getvalue(self);
PyObject *dict;
PyObject *state;
if (initvalue == NULL)
return NULL;
if (self->dict == NULL) {
Py_INCREF(Py_None);
dict = Py_None;
}
else {
dict = PyDict_Copy(self->dict);
if (dict == NULL)
return NULL;
}
state = Py_BuildValue("(OnN)", initvalue, self->pos, dict);
Py_DECREF(initvalue);
return state;
}
static PyObject *
bytesio_setstate(bytesio *self, PyObject *state)
{
PyObject *result;
PyObject *position_obj;
PyObject *dict;
Py_ssize_t pos;
assert(state != NULL);
/* We allow the state tuple to be longer than 3, because we may need
someday to extend the object's state without breaking
backward-compatibility. */
if (!PyTuple_Check(state) || Py_SIZE(state) < 3) {
PyErr_Format(PyExc_TypeError,
"%.200s.__setstate__ argument should be 3-tuple, got %.200s",
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
return NULL;
}
/* Reset the object to its default state. This is only needed to handle
the case of repeated calls to __setstate__. */
self->string_size = 0;
self->pos = 0;
/* Set the value of the internal buffer. If state[0] does not support the
buffer protocol, bytesio_write will raise the appropriate TypeError. */
result = bytesio_write(self, PyTuple_GET_ITEM(state, 0));
if (result == NULL)
return NULL;
Py_DECREF(result);
/* Set carefully the position value. Alternatively, we could use the seek
method instead of modifying self->pos directly to better protect the
object internal state against errneous (or malicious) inputs. */
position_obj = PyTuple_GET_ITEM(state, 1);
if (!PyIndex_Check(position_obj)) {
PyErr_Format(PyExc_TypeError,
"second item of state must be an integer, not %.200s",
Py_TYPE(position_obj)->tp_name);
return NULL;
}
pos = PyNumber_AsSsize_t(position_obj, PyExc_OverflowError);
if (pos == -1 && PyErr_Occurred())
return NULL;
if (pos < 0) {
PyErr_SetString(PyExc_ValueError,
"position value cannot be negative");
return NULL;
}
self->pos = pos;
/* Set the dictionary of the instance variables. */
dict = PyTuple_GET_ITEM(state, 2);
if (dict != Py_None) {
if (!PyDict_Check(dict)) {
PyErr_Format(PyExc_TypeError,
"third item of state should be a dict, got a %.200s",
Py_TYPE(dict)->tp_name);
return NULL;
}
if (self->dict) {
/* Alternatively, we could replace the internal dictionary
completely. However, it seems more practical to just update it. */
if (PyDict_Update(self->dict, dict) < 0)
return NULL;
}
else {
Py_INCREF(dict);
self->dict = dict;
}
}
Py_RETURN_NONE;
}
static void
bytesio_dealloc(bytesio *self)
{
_PyObject_GC_UNTRACK(self);
if (self->buf != NULL) {
PyMem_Free(self->buf);
self->buf = NULL;
}
Py_CLEAR(self->dict);
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
Py_TYPE(self)->tp_free(self);
}
static PyObject *
bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
bytesio *self;
assert(type != NULL && type->tp_alloc != NULL);
self = (bytesio *)type->tp_alloc(type, 0);
if (self == NULL)
return NULL;
/* tp_alloc initializes all the fields to zero. So we don't have to
initialize them here. */
self->buf = (char *)PyMem_Malloc(0);
if (self->buf == NULL) {
Py_DECREF(self);
return PyErr_NoMemory();
}
return (PyObject *)self;
}
static int
bytesio_init(bytesio *self, PyObject *args, PyObject *kwds)
{
char *kwlist[] = {"initial_bytes", NULL};
PyObject *initvalue = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:BytesIO", kwlist,
&initvalue))
return -1;
/* In case, __init__ is called multiple times. */
self->string_size = 0;
self->pos = 0;
if (initvalue && initvalue != Py_None) {
PyObject *res;
res = bytesio_write(self, initvalue);
if (res == NULL)
return -1;
Py_DECREF(res);
self->pos = 0;
}
return 0;
}
static PyObject *
bytesio_sizeof(bytesio *self, void *unused)
{
Py_ssize_t res;
res = sizeof(bytesio);
if (self->buf)
res += self->buf_size;
return PyLong_FromSsize_t(res);
}
static int
bytesio_traverse(bytesio *self, visitproc visit, void *arg)
{
Py_VISIT(self->dict);
return 0;
}
static int
bytesio_clear(bytesio *self)
{
Py_CLEAR(self->dict);
return 0;
}
static PyGetSetDef bytesio_getsetlist[] = {
{"closed", (getter)bytesio_get_closed, NULL,
"True if the file is closed."},
{NULL}, /* sentinel */
};
static struct PyMethodDef bytesio_methods[] = {
{"readable", (PyCFunction)return_not_closed, METH_NOARGS, readable_doc},
{"seekable", (PyCFunction)return_not_closed, METH_NOARGS, seekable_doc},
{"writable", (PyCFunction)return_not_closed, METH_NOARGS, writable_doc},
{"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc},
{"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc},
{"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc},
{"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc},
{"write", (PyCFunction)bytesio_write, METH_O, write_doc},
{"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc},
{"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc},
{"readinto", (PyCFunction)bytesio_readinto, METH_VARARGS, readinto_doc},
{"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
{"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
{"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
{"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc},
{"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
{"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
{"__getstate__", (PyCFunction)bytesio_getstate, METH_NOARGS, NULL},
{"__setstate__", (PyCFunction)bytesio_setstate, METH_O, NULL},
{"__sizeof__", (PyCFunction)bytesio_sizeof, METH_NOARGS, NULL},
{NULL, NULL} /* sentinel */
};
PyDoc_STRVAR(bytesio_doc,
"BytesIO([buffer]) -> object\n"
"\n"
"Create a buffered I/O implementation using an in-memory bytes\n"
"buffer, ready for reading and writing.");
PyTypeObject PyBytesIO_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_io.BytesIO", /*tp_name*/
sizeof(bytesio), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)bytesio_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_reserved*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
Py_TPFLAGS_HAVE_GC, /*tp_flags*/
bytesio_doc, /*tp_doc*/
(traverseproc)bytesio_traverse, /*tp_traverse*/
(inquiry)bytesio_clear, /*tp_clear*/
0, /*tp_richcompare*/
offsetof(bytesio, weakreflist), /*tp_weaklistoffset*/
PyObject_SelfIter, /*tp_iter*/
(iternextfunc)bytesio_iternext, /*tp_iternext*/
bytesio_methods, /*tp_methods*/
0, /*tp_members*/
bytesio_getsetlist, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
offsetof(bytesio, dict), /*tp_dictoffset*/
(initproc)bytesio_init, /*tp_init*/
0, /*tp_alloc*/
bytesio_new, /*tp_new*/
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,913 @@
/*
An implementation of the I/O abstract base classes hierarchy
as defined by PEP 3116 - "New I/O"
Classes defined here: IOBase, RawIOBase.
Written by Amaury Forgeot d'Arc and Antoine Pitrou
*/
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "structmember.h"
#include "_iomodule.h"
/*
* IOBase class, an abstract class
*/
typedef struct {
PyObject_HEAD
PyObject *dict;
PyObject *weakreflist;
} iobase;
PyDoc_STRVAR(iobase_doc,
"The abstract base class for all I/O classes, acting on streams of\n"
"bytes. There is no public constructor.\n"
"\n"
"This class provides dummy implementations for many methods that\n"
"derived classes can override selectively; the default implementations\n"
"represent a file that cannot be read, written or seeked.\n"
"\n"
"Even though IOBase does not declare read, readinto, or write because\n"
"their signatures will vary, implementations and clients should\n"
"consider those methods part of the interface. Also, implementations\n"
"may raise a IOError when operations they do not support are called.\n"
"\n"
"The basic type used for binary data read from or written to a file is\n"
"bytes. bytearrays are accepted too, and in some cases (such as\n"
"readinto) needed. Text I/O classes work with str data.\n"
"\n"
"Note that calling any method (except additional calls to close(),\n"
"which are ignored) on a closed stream should raise a ValueError.\n"
"\n"
"IOBase (and its subclasses) support the iterator protocol, meaning\n"
"that an IOBase object can be iterated over yielding the lines in a\n"
"stream.\n"
"\n"
"IOBase also supports the :keyword:`with` statement. In this example,\n"
"fp is closed after the suite of the with statement is complete:\n"
"\n"
"with open('spam.txt', 'r') as fp:\n"
" fp.write('Spam and eggs!')\n");
/* Use this macro whenever you want to check the internal `closed` status
of the IOBase object rather than the virtual `closed` attribute as returned
by whatever subclass. */
#define IS_CLOSED(self) \
PyObject_HasAttrString(self, "__IOBase_closed")
/* Internal methods */
static PyObject *
iobase_unsupported(const char *message)
{
PyErr_SetString(_PyIO_unsupported_operation, message);
return NULL;
}
/* Positionning */
PyDoc_STRVAR(iobase_seek_doc,
"Change stream position.\n"
"\n"
"Change the stream position to the given byte offset. The offset is\n"
"interpreted relative to the position indicated by whence. Values\n"
"for whence are:\n"
"\n"
"* 0 -- start of stream (the default); offset should be zero or positive\n"
"* 1 -- current stream position; offset may be negative\n"
"* 2 -- end of stream; offset is usually negative\n"
"\n"
"Return the new absolute position.");
static PyObject *
iobase_seek(PyObject *self, PyObject *args)
{
return iobase_unsupported("seek");
}
PyDoc_STRVAR(iobase_tell_doc,
"Return current stream position.");
static PyObject *
iobase_tell(PyObject *self, PyObject *args)
{
return PyObject_CallMethod(self, "seek", "ii", 0, 1);
}
PyDoc_STRVAR(iobase_truncate_doc,
"Truncate file to size bytes.\n"
"\n"
"File pointer is left unchanged. Size defaults to the current IO\n"
"position as reported by tell(). Returns the new size.");
static PyObject *
iobase_truncate(PyObject *self, PyObject *args)
{
return iobase_unsupported("truncate");
}
/* Flush and close methods */
PyDoc_STRVAR(iobase_flush_doc,
"Flush write buffers, if applicable.\n"
"\n"
"This is not implemented for read-only and non-blocking streams.\n");
static PyObject *
iobase_flush(PyObject *self, PyObject *args)
{
/* XXX Should this return the number of bytes written??? */
if (IS_CLOSED(self)) {
PyErr_SetString(PyExc_ValueError, "I/O operation on closed file.");
return NULL;
}
Py_RETURN_NONE;
}
PyDoc_STRVAR(iobase_close_doc,
"Flush and close the IO object.\n"
"\n"
"This method has no effect if the file is already closed.\n");
static int
iobase_closed(PyObject *self)
{
PyObject *res;
int closed;
/* This gets the derived attribute, which is *not* __IOBase_closed
in most cases! */
res = PyObject_GetAttr(self, _PyIO_str_closed);
if (res == NULL)
return 0;
closed = PyObject_IsTrue(res);
Py_DECREF(res);
return closed;
}
static PyObject *
iobase_closed_get(PyObject *self, void *context)
{
return PyBool_FromLong(IS_CLOSED(self));
}
PyObject *
_PyIOBase_check_closed(PyObject *self, PyObject *args)
{
if (iobase_closed(self)) {
PyErr_SetString(PyExc_ValueError, "I/O operation on closed file.");
return NULL;
}
if (args == Py_True)
return Py_None;
else
Py_RETURN_NONE;
}
/* XXX: IOBase thinks it has to maintain its own internal state in
`__IOBase_closed` and call flush() by itself, but it is redundant with
whatever behaviour a non-trivial derived class will implement. */
static PyObject *
iobase_close(PyObject *self, PyObject *args)
{
PyObject *res;
if (IS_CLOSED(self))
Py_RETURN_NONE;
res = PyObject_CallMethodObjArgs(self, _PyIO_str_flush, NULL);
PyObject_SetAttrString(self, "__IOBase_closed", Py_True);
if (res == NULL) {
return NULL;
}
Py_XDECREF(res);
Py_RETURN_NONE;
}
/* Finalization and garbage collection support */
int
_PyIOBase_finalize(PyObject *self)
{
PyObject *res;
PyObject *tp, *v, *tb;
int closed = 1;
int is_zombie;
/* If _PyIOBase_finalize() is called from a destructor, we need to
resurrect the object as calling close() can invoke arbitrary code. */
is_zombie = (Py_REFCNT(self) == 0);
if (is_zombie) {
++Py_REFCNT(self);
}
PyErr_Fetch(&tp, &v, &tb);
/* If `closed` doesn't exist or can't be evaluated as bool, then the
object is probably in an unusable state, so ignore. */
res = PyObject_GetAttr(self, _PyIO_str_closed);
if (res == NULL)
PyErr_Clear();
else {
closed = PyObject_IsTrue(res);
Py_DECREF(res);
if (closed == -1)
PyErr_Clear();
}
if (closed == 0) {
res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_close,
NULL);
/* Silencing I/O errors is bad, but printing spurious tracebacks is
equally as bad, and potentially more frequent (because of
shutdown issues). */
if (res == NULL)
PyErr_Clear();
else
Py_DECREF(res);
}
PyErr_Restore(tp, v, tb);
if (is_zombie) {
if (--Py_REFCNT(self) != 0) {
/* The object lives again. The following code is taken from
slot_tp_del in typeobject.c. */
Py_ssize_t refcnt = Py_REFCNT(self);
_Py_NewReference(self);
Py_REFCNT(self) = refcnt;
/* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so
* we need to undo that. */
_Py_DEC_REFTOTAL;
/* If Py_TRACE_REFS, _Py_NewReference re-added self to the object
* chain, so no more to do there.
* If COUNT_ALLOCS, the original decref bumped tp_frees, and
* _Py_NewReference bumped tp_allocs: both of those need to be
* undone.
*/
#ifdef COUNT_ALLOCS
--Py_TYPE(self)->tp_frees;
--Py_TYPE(self)->tp_allocs;
#endif
return -1;
}
}
return 0;
}
static int
iobase_traverse(iobase *self, visitproc visit, void *arg)
{
Py_VISIT(self->dict);
return 0;
}
static int
iobase_clear(iobase *self)
{
if (_PyIOBase_finalize((PyObject *) self) < 0)
return -1;
Py_CLEAR(self->dict);
return 0;
}
/* Destructor */
static void
iobase_dealloc(iobase *self)
{
/* NOTE: since IOBaseObject has its own dict, Python-defined attributes
are still available here for close() to use.
However, if the derived class declares a __slots__, those slots are
already gone.
*/
if (_PyIOBase_finalize((PyObject *) self) < 0) {
/* When called from a heap type's dealloc, the type will be
decref'ed on return (see e.g. subtype_dealloc in typeobject.c). */
if (PyType_HasFeature(Py_TYPE(self), Py_TPFLAGS_HEAPTYPE))
Py_INCREF(Py_TYPE(self));
return;
}
_PyObject_GC_UNTRACK(self);
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
Py_CLEAR(self->dict);
Py_TYPE(self)->tp_free((PyObject *) self);
}
/* Inquiry methods */
PyDoc_STRVAR(iobase_seekable_doc,
"Return whether object supports random access.\n"
"\n"
"If False, seek(), tell() and truncate() will raise IOError.\n"
"This method may need to do a test seek().");
static PyObject *
iobase_seekable(PyObject *self, PyObject *args)
{
Py_RETURN_FALSE;
}
PyObject *
_PyIOBase_check_seekable(PyObject *self, PyObject *args)
{
PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_seekable, NULL);
if (res == NULL)
return NULL;
if (res != Py_True) {
Py_CLEAR(res);
PyErr_SetString(PyExc_IOError, "File or stream is not seekable.");
return NULL;
}
if (args == Py_True) {
Py_DECREF(res);
}
return res;
}
PyDoc_STRVAR(iobase_readable_doc,
"Return whether object was opened for reading.\n"
"\n"
"If False, read() will raise IOError.");
static PyObject *
iobase_readable(PyObject *self, PyObject *args)
{
Py_RETURN_FALSE;
}
/* May be called with any object */
PyObject *
_PyIOBase_check_readable(PyObject *self, PyObject *args)
{
PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_readable, NULL);
if (res == NULL)
return NULL;
if (res != Py_True) {
Py_CLEAR(res);
PyErr_SetString(PyExc_IOError, "File or stream is not readable.");
return NULL;
}
if (args == Py_True) {
Py_DECREF(res);
}
return res;
}
PyDoc_STRVAR(iobase_writable_doc,
"Return whether object was opened for writing.\n"
"\n"
"If False, read() will raise IOError.");
static PyObject *
iobase_writable(PyObject *self, PyObject *args)
{
Py_RETURN_FALSE;
}
/* May be called with any object */
PyObject *
_PyIOBase_check_writable(PyObject *self, PyObject *args)
{
PyObject *res = PyObject_CallMethodObjArgs(self, _PyIO_str_writable, NULL);
if (res == NULL)
return NULL;
if (res != Py_True) {
Py_CLEAR(res);
PyErr_SetString(PyExc_IOError, "File or stream is not writable.");
return NULL;
}
if (args == Py_True) {
Py_DECREF(res);
}
return res;
}
/* Context manager */
static PyObject *
iobase_enter(PyObject *self, PyObject *args)
{
if (_PyIOBase_check_closed(self, Py_True) == NULL)
return NULL;
Py_INCREF(self);
return self;
}
static PyObject *
iobase_exit(PyObject *self, PyObject *args)
{
return PyObject_CallMethodObjArgs(self, _PyIO_str_close, NULL);
}
/* Lower-level APIs */
/* XXX Should these be present even if unimplemented? */
PyDoc_STRVAR(iobase_fileno_doc,
"Returns underlying file descriptor if one exists.\n"
"\n"
"An IOError is raised if the IO object does not use a file descriptor.\n");
static PyObject *
iobase_fileno(PyObject *self, PyObject *args)
{
return iobase_unsupported("fileno");
}
PyDoc_STRVAR(iobase_isatty_doc,
"Return whether this is an 'interactive' stream.\n"
"\n"
"Return False if it can't be determined.\n");
static PyObject *
iobase_isatty(PyObject *self, PyObject *args)
{
if (_PyIOBase_check_closed(self, Py_True) == NULL)
return NULL;
Py_RETURN_FALSE;
}
/* Readline(s) and writelines */
PyDoc_STRVAR(iobase_readline_doc,
"Read and return a line from the stream.\n"
"\n"
"If limit is specified, at most limit bytes will be read.\n"
"\n"
"The line terminator is always b'\\n' for binary files; for text\n"
"files, the newlines argument to open can be used to select the line\n"
"terminator(s) recognized.\n");
static PyObject *
iobase_readline(PyObject *self, PyObject *args)
{
/* For backwards compatibility, a (slowish) readline(). */
Py_ssize_t limit = -1;
int has_peek = 0;
PyObject *buffer, *result;
Py_ssize_t old_size = -1;
if (!PyArg_ParseTuple(args, "|O&:readline", &_PyIO_ConvertSsize_t, &limit)) {
return NULL;
}
if (PyObject_HasAttrString(self, "peek"))
has_peek = 1;
buffer = PyByteArray_FromStringAndSize(NULL, 0);
if (buffer == NULL)
return NULL;
while (limit < 0 || Py_SIZE(buffer) < limit) {
Py_ssize_t nreadahead = 1;
PyObject *b;
if (has_peek) {
PyObject *readahead = PyObject_CallMethod(self, "peek", "i", 1);
if (readahead == NULL) {
/* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
when EINTR occurs so we needn't do it ourselves. */
if (_PyIO_trap_eintr()) {
continue;
}
goto fail;
}
if (!PyBytes_Check(readahead)) {
PyErr_Format(PyExc_IOError,
"peek() should have returned a bytes object, "
"not '%.200s'", Py_TYPE(readahead)->tp_name);
Py_DECREF(readahead);
goto fail;
}
if (PyBytes_GET_SIZE(readahead) > 0) {
Py_ssize_t n = 0;
const char *buf = PyBytes_AS_STRING(readahead);
if (limit >= 0) {
do {
if (n >= PyBytes_GET_SIZE(readahead) || n >= limit)
break;
if (buf[n++] == '\n')
break;
} while (1);
}
else {
do {
if (n >= PyBytes_GET_SIZE(readahead))
break;
if (buf[n++] == '\n')
break;
} while (1);
}
nreadahead = n;
}
Py_DECREF(readahead);
}
b = PyObject_CallMethod(self, "read", "n", nreadahead);
if (b == NULL) {
/* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
when EINTR occurs so we needn't do it ourselves. */
if (_PyIO_trap_eintr()) {
continue;
}
goto fail;
}
if (!PyBytes_Check(b)) {
PyErr_Format(PyExc_IOError,
"read() should have returned a bytes object, "
"not '%.200s'", Py_TYPE(b)->tp_name);
Py_DECREF(b);
goto fail;
}
if (PyBytes_GET_SIZE(b) == 0) {
Py_DECREF(b);
break;
}
old_size = PyByteArray_GET_SIZE(buffer);
PyByteArray_Resize(buffer, old_size + PyBytes_GET_SIZE(b));
memcpy(PyByteArray_AS_STRING(buffer) + old_size,
PyBytes_AS_STRING(b), PyBytes_GET_SIZE(b));
Py_DECREF(b);
if (PyByteArray_AS_STRING(buffer)[PyByteArray_GET_SIZE(buffer) - 1] == '\n')
break;
}
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(buffer),
PyByteArray_GET_SIZE(buffer));
Py_DECREF(buffer);
return result;
fail:
Py_DECREF(buffer);
return NULL;
}
static PyObject *
iobase_iter(PyObject *self)
{
if (_PyIOBase_check_closed(self, Py_True) == NULL)
return NULL;
Py_INCREF(self);
return self;
}
static PyObject *
iobase_iternext(PyObject *self)
{
PyObject *line = PyObject_CallMethodObjArgs(self, _PyIO_str_readline, NULL);
if (line == NULL)
return NULL;
if (PyObject_Size(line) == 0) {
Py_DECREF(line);
return NULL;
}
return line;
}
PyDoc_STRVAR(iobase_readlines_doc,
"Return a list of lines from the stream.\n"
"\n"
"hint can be specified to control the number of lines read: no more\n"
"lines will be read if the total size (in bytes/characters) of all\n"
"lines so far exceeds hint.");
static PyObject *
iobase_readlines(PyObject *self, PyObject *args)
{
Py_ssize_t hint = -1, length = 0;
PyObject *result;
if (!PyArg_ParseTuple(args, "|O&:readlines", &_PyIO_ConvertSsize_t, &hint)) {
return NULL;
}
result = PyList_New(0);
if (result == NULL)
return NULL;
if (hint <= 0) {
/* XXX special-casing this made sense in the Python version in order
to remove the bytecode interpretation overhead, but it could
probably be removed here. */
PyObject *ret = PyObject_CallMethod(result, "extend", "O", self);
if (ret == NULL) {
Py_DECREF(result);
return NULL;
}
Py_DECREF(ret);
return result;
}
while (1) {
PyObject *line = PyIter_Next(self);
if (line == NULL) {
if (PyErr_Occurred()) {
Py_DECREF(result);
return NULL;
}
else
break; /* StopIteration raised */
}
if (PyList_Append(result, line) < 0) {
Py_DECREF(line);
Py_DECREF(result);
return NULL;
}
length += PyObject_Size(line);
Py_DECREF(line);
if (length > hint)
break;
}
return result;
}
static PyObject *
iobase_writelines(PyObject *self, PyObject *args)
{
PyObject *lines, *iter, *res;
if (!PyArg_ParseTuple(args, "O:writelines", &lines)) {
return NULL;
}
if (_PyIOBase_check_closed(self, Py_True) == NULL)
return NULL;
iter = PyObject_GetIter(lines);
if (iter == NULL)
return NULL;
while (1) {
PyObject *line = PyIter_Next(iter);
if (line == NULL) {
if (PyErr_Occurred()) {
Py_DECREF(iter);
return NULL;
}
else
break; /* Stop Iteration */
}
res = NULL;
do {
res = PyObject_CallMethodObjArgs(self, _PyIO_str_write, line, NULL);
} while (res == NULL && _PyIO_trap_eintr());
Py_DECREF(line);
if (res == NULL) {
Py_DECREF(iter);
return NULL;
}
Py_DECREF(res);
}
Py_DECREF(iter);
Py_RETURN_NONE;
}
static PyMethodDef iobase_methods[] = {
{"seek", iobase_seek, METH_VARARGS, iobase_seek_doc},
{"tell", iobase_tell, METH_NOARGS, iobase_tell_doc},
{"truncate", iobase_truncate, METH_VARARGS, iobase_truncate_doc},
{"flush", iobase_flush, METH_NOARGS, iobase_flush_doc},
{"close", iobase_close, METH_NOARGS, iobase_close_doc},
{"seekable", iobase_seekable, METH_NOARGS, iobase_seekable_doc},
{"readable", iobase_readable, METH_NOARGS, iobase_readable_doc},
{"writable", iobase_writable, METH_NOARGS, iobase_writable_doc},
{"_checkClosed", _PyIOBase_check_closed, METH_NOARGS},
{"_checkSeekable", _PyIOBase_check_seekable, METH_NOARGS},
{"_checkReadable", _PyIOBase_check_readable, METH_NOARGS},
{"_checkWritable", _PyIOBase_check_writable, METH_NOARGS},
{"fileno", iobase_fileno, METH_NOARGS, iobase_fileno_doc},
{"isatty", iobase_isatty, METH_NOARGS, iobase_isatty_doc},
{"__enter__", iobase_enter, METH_NOARGS},
{"__exit__", iobase_exit, METH_VARARGS},
{"readline", iobase_readline, METH_VARARGS, iobase_readline_doc},
{"readlines", iobase_readlines, METH_VARARGS, iobase_readlines_doc},
{"writelines", iobase_writelines, METH_VARARGS},
{NULL, NULL}
};
static PyGetSetDef iobase_getset[] = {
{"closed", (getter)iobase_closed_get, NULL, NULL},
{NULL}
};
PyTypeObject PyIOBase_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_io._IOBase", /*tp_name*/
sizeof(iobase), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)iobase_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare */
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_HAVE_GC, /*tp_flags*/
iobase_doc, /* tp_doc */
(traverseproc)iobase_traverse, /* tp_traverse */
(inquiry)iobase_clear, /* tp_clear */
0, /* tp_richcompare */
offsetof(iobase, weakreflist), /* tp_weaklistoffset */
iobase_iter, /* tp_iter */
iobase_iternext, /* tp_iternext */
iobase_methods, /* tp_methods */
0, /* tp_members */
iobase_getset, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
offsetof(iobase, dict), /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
/*
* RawIOBase class, Inherits from IOBase.
*/
PyDoc_STRVAR(rawiobase_doc,
"Base class for raw binary I/O.");
/*
* The read() method is implemented by calling readinto(); derived classes
* that want to support read() only need to implement readinto() as a
* primitive operation. In general, readinto() can be more efficient than
* read().
*
* (It would be tempting to also provide an implementation of readinto() in
* terms of read(), in case the latter is a more suitable primitive operation,
* but that would lead to nasty recursion in case a subclass doesn't implement
* either.)
*/
static PyObject *
rawiobase_read(PyObject *self, PyObject *args)
{
Py_ssize_t n = -1;
PyObject *b, *res;
if (!PyArg_ParseTuple(args, "|n:read", &n)) {
return NULL;
}
if (n < 0)
return PyObject_CallMethod(self, "readall", NULL);
/* TODO: allocate a bytes object directly instead and manually construct
a writable memoryview pointing to it. */
b = PyByteArray_FromStringAndSize(NULL, n);
if (b == NULL)
return NULL;
res = PyObject_CallMethodObjArgs(self, _PyIO_str_readinto, b, NULL);
if (res == NULL || res == Py_None) {
Py_DECREF(b);
return res;
}
n = PyNumber_AsSsize_t(res, PyExc_ValueError);
Py_DECREF(res);
if (n == -1 && PyErr_Occurred()) {
Py_DECREF(b);
return NULL;
}
res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), n);
Py_DECREF(b);
return res;
}
PyDoc_STRVAR(rawiobase_readall_doc,
"Read until EOF, using multiple read() call.");
static PyObject *
rawiobase_readall(PyObject *self, PyObject *args)
{
int r;
PyObject *chunks = PyList_New(0);
PyObject *result;
if (chunks == NULL)
return NULL;
while (1) {
PyObject *data = PyObject_CallMethod(self, "read",
"i", DEFAULT_BUFFER_SIZE);
if (!data) {
/* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
when EINTR occurs so we needn't do it ourselves. */
if (_PyIO_trap_eintr()) {
continue;
}
Py_DECREF(chunks);
return NULL;
}
if (data == Py_None) {
if (PyList_GET_SIZE(chunks) == 0) {
Py_DECREF(chunks);
return data;
}
Py_DECREF(data);
break;
}
if (!PyBytes_Check(data)) {
Py_DECREF(chunks);
Py_DECREF(data);
PyErr_SetString(PyExc_TypeError, "read() should return bytes");
return NULL;
}
if (PyBytes_GET_SIZE(data) == 0) {
/* EOF */
Py_DECREF(data);
break;
}
r = PyList_Append(chunks, data);
Py_DECREF(data);
if (r < 0) {
Py_DECREF(chunks);
return NULL;
}
}
result = _PyBytes_Join(_PyIO_empty_bytes, chunks);
Py_DECREF(chunks);
return result;
}
static PyMethodDef rawiobase_methods[] = {
{"read", rawiobase_read, METH_VARARGS},
{"readall", rawiobase_readall, METH_NOARGS, rawiobase_readall_doc},
{NULL, NULL}
};
PyTypeObject PyRawIOBase_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_io._RawIOBase", /*tp_name*/
0, /*tp_basicsize*/
0, /*tp_itemsize*/
0, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare */
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
rawiobase_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
rawiobase_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
&PyIOBase_Type, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};

View File

@ -0,0 +1,895 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "structmember.h"
#include "_iomodule.h"
/* Implementation note: the buffer is always at least one character longer
than the enclosed string, for proper functioning of _PyIO_find_line_ending.
*/
typedef struct {
PyObject_HEAD
Py_UNICODE *buf;
Py_ssize_t pos;
Py_ssize_t string_size;
size_t buf_size;
char ok; /* initialized? */
char closed;
char readuniversal;
char readtranslate;
PyObject *decoder;
PyObject *readnl;
PyObject *writenl;
PyObject *dict;
PyObject *weakreflist;
} stringio;
#define CHECK_INITIALIZED(self) \
if (self->ok <= 0) { \
PyErr_SetString(PyExc_ValueError, \
"I/O operation on uninitialized object"); \
return NULL; \
}
#define CHECK_CLOSED(self) \
if (self->closed) { \
PyErr_SetString(PyExc_ValueError, \
"I/O operation on closed file"); \
return NULL; \
}
PyDoc_STRVAR(stringio_doc,
"Text I/O implementation using an in-memory buffer.\n"
"\n"
"The initial_value argument sets the value of object. The newline\n"
"argument is like the one of TextIOWrapper's constructor.");
/* Internal routine for changing the size, in terms of characters, of the
buffer of StringIO objects. The caller should ensure that the 'size'
argument is non-negative. Returns 0 on success, -1 otherwise. */
static int
resize_buffer(stringio *self, size_t size)
{
/* Here, unsigned types are used to avoid dealing with signed integer
overflow, which is undefined in C. */
size_t alloc = self->buf_size;
Py_UNICODE *new_buf = NULL;
assert(self->buf != NULL);
/* Reserve one more char for line ending detection. */
size = size + 1;
/* For simplicity, stay in the range of the signed type. Anyway, Python
doesn't allow strings to be longer than this. */
if (size > PY_SSIZE_T_MAX)
goto overflow;
if (size < alloc / 2) {
/* Major downsize; resize down to exact size. */
alloc = size + 1;
}
else if (size < alloc) {
/* Within allocated size; quick exit */
return 0;
}
else if (size <= alloc * 1.125) {
/* Moderate upsize; overallocate similar to list_resize() */
alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
}
else {
/* Major upsize; resize up to exact size */
alloc = size + 1;
}
if (alloc > ((size_t)-1) / sizeof(Py_UNICODE))
goto overflow;
new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf,
alloc * sizeof(Py_UNICODE));
if (new_buf == NULL) {
PyErr_NoMemory();
return -1;
}
self->buf_size = alloc;
self->buf = new_buf;
return 0;
overflow:
PyErr_SetString(PyExc_OverflowError,
"new buffer size too large");
return -1;
}
/* Internal routine for writing a whole PyUnicode object to the buffer of a
StringIO object. Returns 0 on success, or -1 on error. */
static Py_ssize_t
write_str(stringio *self, PyObject *obj)
{
Py_UNICODE *str;
Py_ssize_t len;
PyObject *decoded = NULL;
assert(self->buf != NULL);
assert(self->pos >= 0);
if (self->decoder != NULL) {
decoded = _PyIncrementalNewlineDecoder_decode(
self->decoder, obj, 1 /* always final */);
}
else {
decoded = obj;
Py_INCREF(decoded);
}
if (self->writenl) {
PyObject *translated = PyUnicode_Replace(
decoded, _PyIO_str_nl, self->writenl, -1);
Py_DECREF(decoded);
decoded = translated;
}
if (decoded == NULL)
return -1;
assert(PyUnicode_Check(decoded));
str = PyUnicode_AS_UNICODE(decoded);
len = PyUnicode_GET_SIZE(decoded);
assert(len >= 0);
/* This overflow check is not strictly necessary. However, it avoids us to
deal with funky things like comparing an unsigned and a signed
integer. */
if (self->pos > PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
goto fail;
}
if (self->pos + len > self->string_size) {
if (resize_buffer(self, self->pos + len) < 0)
goto fail;
}
if (self->pos > self->string_size) {
/* In case of overseek, pad with null bytes the buffer region between
the end of stream and the current position.
0 lo string_size hi
| |<---used--->|<----------available----------->|
| | <--to pad-->|<---to write---> |
0 buf position
*/
memset(self->buf + self->string_size, '\0',
(self->pos - self->string_size) * sizeof(Py_UNICODE));
}
/* Copy the data to the internal buffer, overwriting some of the
existing data if self->pos < self->string_size. */
memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
self->pos += len;
/* Set the new length of the internal string if it has changed. */
if (self->string_size < self->pos) {
self->string_size = self->pos;
}
Py_DECREF(decoded);
return 0;
fail:
Py_XDECREF(decoded);
return -1;
}
PyDoc_STRVAR(stringio_getvalue_doc,
"Retrieve the entire contents of the object.");
static PyObject *
stringio_getvalue(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
return PyUnicode_FromUnicode(self->buf, self->string_size);
}
PyDoc_STRVAR(stringio_tell_doc,
"Tell the current file position.");
static PyObject *
stringio_tell(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
return PyLong_FromSsize_t(self->pos);
}
PyDoc_STRVAR(stringio_read_doc,
"Read at most n characters, returned as a string.\n"
"\n"
"If the argument is negative or omitted, read until EOF\n"
"is reached. Return an empty string at EOF.\n");
static PyObject *
stringio_read(stringio *self, PyObject *args)
{
Py_ssize_t size, n;
Py_UNICODE *output;
PyObject *arg = Py_None;
CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:read", &arg))
return NULL;
CHECK_CLOSED(self);
if (PyNumber_Check(arg)) {
size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
else if (arg == Py_None) {
/* Read until EOF is reached, by default. */
size = -1;
}
else {
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}
/* adjust invalid sizes */
n = self->string_size - self->pos;
if (size < 0 || size > n) {
size = n;
if (size < 0)
size = 0;
}
output = self->buf + self->pos;
self->pos += size;
return PyUnicode_FromUnicode(output, size);
}
/* Internal helper, used by stringio_readline and stringio_iternext */
static PyObject *
_stringio_readline(stringio *self, Py_ssize_t limit)
{
Py_UNICODE *start, *end, old_char;
Py_ssize_t len, consumed;
/* In case of overseek, return the empty string */
if (self->pos >= self->string_size)
return PyUnicode_FromString("");
start = self->buf + self->pos;
if (limit < 0 || limit > self->string_size - self->pos)
limit = self->string_size - self->pos;
end = start + limit;
old_char = *end;
*end = '\0';
len = _PyIO_find_line_ending(
self->readtranslate, self->readuniversal, self->readnl,
start, end, &consumed);
*end = old_char;
/* If we haven't found any line ending, we just return everything
(`consumed` is ignored). */
if (len < 0)
len = limit;
self->pos += len;
return PyUnicode_FromUnicode(start, len);
}
PyDoc_STRVAR(stringio_readline_doc,
"Read until newline or EOF.\n"
"\n"
"Returns an empty string if EOF is hit immediately.\n");
static PyObject *
stringio_readline(stringio *self, PyObject *args)
{
PyObject *arg = Py_None;
Py_ssize_t limit = -1;
CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
return NULL;
CHECK_CLOSED(self);
if (PyNumber_Check(arg)) {
limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (limit == -1 && PyErr_Occurred())
return NULL;
}
else if (arg != Py_None) {
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}
return _stringio_readline(self, limit);
}
static PyObject *
stringio_iternext(stringio *self)
{
PyObject *line;
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
if (Py_TYPE(self) == &PyStringIO_Type) {
/* Skip method call overhead for speed */
line = _stringio_readline(self, -1);
}
else {
/* XXX is subclassing StringIO really supported? */
line = PyObject_CallMethodObjArgs((PyObject *)self,
_PyIO_str_readline, NULL);
if (line && !PyUnicode_Check(line)) {
PyErr_Format(PyExc_IOError,
"readline() should have returned an str object, "
"not '%.200s'", Py_TYPE(line)->tp_name);
Py_DECREF(line);
return NULL;
}
}
if (line == NULL)
return NULL;
if (PyUnicode_GET_SIZE(line) == 0) {
/* Reached EOF */
Py_DECREF(line);
return NULL;
}
return line;
}
PyDoc_STRVAR(stringio_truncate_doc,
"Truncate size to pos.\n"
"\n"
"The pos argument defaults to the current file position, as\n"
"returned by tell(). The current file position is unchanged.\n"
"Returns the new absolute position.\n");
static PyObject *
stringio_truncate(stringio *self, PyObject *args)
{
Py_ssize_t size;
PyObject *arg = Py_None;
CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL;
CHECK_CLOSED(self);
if (PyNumber_Check(arg)) {
size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
else if (arg == Py_None) {
/* Truncate to current position if no argument is passed. */
size = self->pos;
}
else {
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
Py_TYPE(arg)->tp_name);
return NULL;
}
if (size < 0) {
PyErr_Format(PyExc_ValueError,
"Negative size value %zd", size);
return NULL;
}
if (size < self->string_size) {
if (resize_buffer(self, size) < 0)
return NULL;
self->string_size = size;
}
return PyLong_FromSsize_t(size);
}
PyDoc_STRVAR(stringio_seek_doc,
"Change stream position.\n"
"\n"
"Seek to character offset pos relative to position indicated by whence:\n"
" 0 Start of stream (the default). pos should be >= 0;\n"
" 1 Current position - pos must be 0;\n"
" 2 End of stream - pos must be 0.\n"
"Returns the new absolute position.\n");
static PyObject *
stringio_seek(stringio *self, PyObject *args)
{
PyObject *posobj;
Py_ssize_t pos;
int mode = 0;
CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "O|i:seek", &posobj, &mode))
return NULL;
pos = PyNumber_AsSsize_t(posobj, PyExc_OverflowError);
if (pos == -1 && PyErr_Occurred())
return NULL;
CHECK_CLOSED(self);
if (mode != 0 && mode != 1 && mode != 2) {
PyErr_Format(PyExc_ValueError,
"Invalid whence (%i, should be 0, 1 or 2)", mode);
return NULL;
}
else if (pos < 0 && mode == 0) {
PyErr_Format(PyExc_ValueError,
"Negative seek position %zd", pos);
return NULL;
}
else if (mode != 0 && pos != 0) {
PyErr_SetString(PyExc_IOError,
"Can't do nonzero cur-relative seeks");
return NULL;
}
/* mode 0: offset relative to beginning of the string.
mode 1: no change to current position.
mode 2: change position to end of file. */
if (mode == 1) {
pos = self->pos;
}
else if (mode == 2) {
pos = self->string_size;
}
self->pos = pos;
return PyLong_FromSsize_t(self->pos);
}
PyDoc_STRVAR(stringio_write_doc,
"Write string to file.\n"
"\n"
"Returns the number of characters written, which is always equal to\n"
"the length of the string.\n");
static PyObject *
stringio_write(stringio *self, PyObject *obj)
{
Py_ssize_t size;
CHECK_INITIALIZED(self);
if (!PyUnicode_Check(obj)) {
PyErr_Format(PyExc_TypeError, "unicode argument expected, got '%s'",
Py_TYPE(obj)->tp_name);
return NULL;
}
CHECK_CLOSED(self);
size = PyUnicode_GET_SIZE(obj);
if (size > 0 && write_str(self, obj) < 0)
return NULL;
return PyLong_FromSsize_t(size);
}
PyDoc_STRVAR(stringio_close_doc,
"Close the IO object. Attempting any further operation after the\n"
"object is closed will raise a ValueError.\n"
"\n"
"This method has no effect if the file is already closed.\n");
static PyObject *
stringio_close(stringio *self)
{
self->closed = 1;
/* Free up some memory */
if (resize_buffer(self, 0) < 0)
return NULL;
Py_CLEAR(self->readnl);
Py_CLEAR(self->writenl);
Py_CLEAR(self->decoder);
Py_RETURN_NONE;
}
static int
stringio_traverse(stringio *self, visitproc visit, void *arg)
{
Py_VISIT(self->dict);
return 0;
}
static int
stringio_clear(stringio *self)
{
Py_CLEAR(self->dict);
return 0;
}
static void
stringio_dealloc(stringio *self)
{
_PyObject_GC_UNTRACK(self);
self->ok = 0;
if (self->buf) {
PyMem_Free(self->buf);
self->buf = NULL;
}
Py_CLEAR(self->readnl);
Py_CLEAR(self->writenl);
Py_CLEAR(self->decoder);
Py_CLEAR(self->dict);
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
Py_TYPE(self)->tp_free(self);
}
static PyObject *
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
stringio *self;
assert(type != NULL && type->tp_alloc != NULL);
self = (stringio *)type->tp_alloc(type, 0);
if (self == NULL)
return NULL;
/* tp_alloc initializes all the fields to zero. So we don't have to
initialize them here. */
self->buf = (Py_UNICODE *)PyMem_Malloc(0);
if (self->buf == NULL) {
Py_DECREF(self);
return PyErr_NoMemory();
}
return (PyObject *)self;
}
static int
stringio_init(stringio *self, PyObject *args, PyObject *kwds)
{
char *kwlist[] = {"initial_value", "newline", NULL};
PyObject *value = NULL;
char *newline = "\n";
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oz:__init__", kwlist,
&value, &newline))
return -1;
if (newline && newline[0] != '\0'
&& !(newline[0] == '\n' && newline[1] == '\0')
&& !(newline[0] == '\r' && newline[1] == '\0')
&& !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
PyErr_Format(PyExc_ValueError,
"illegal newline value: %s", newline);
return -1;
}
if (value && value != Py_None && !PyUnicode_Check(value)) {
PyErr_Format(PyExc_TypeError,
"initial_value must be unicode or None, not %.200s",
Py_TYPE(value)->tp_name);
return -1;
}
self->ok = 0;
Py_CLEAR(self->readnl);
Py_CLEAR(self->writenl);
Py_CLEAR(self->decoder);
if (newline) {
self->readnl = PyString_FromString(newline);
if (self->readnl == NULL)
return -1;
}
self->readuniversal = (newline == NULL || newline[0] == '\0');
self->readtranslate = (newline == NULL);
/* If newline == "", we don't translate anything.
If newline == "\n" or newline == None, we translate to "\n", which is
a no-op.
(for newline == None, TextIOWrapper translates to os.sepline, but it
is pointless for StringIO)
*/
if (newline != NULL && newline[0] == '\r') {
self->writenl = PyUnicode_FromString(newline);
}
if (self->readuniversal) {
self->decoder = PyObject_CallFunction(
(PyObject *)&PyIncrementalNewlineDecoder_Type,
"Oi", Py_None, (int) self->readtranslate);
if (self->decoder == NULL)
return -1;
}
/* Now everything is set up, resize buffer to size of initial value,
and copy it */
self->string_size = 0;
if (value && value != Py_None) {
Py_ssize_t len = PyUnicode_GetSize(value);
/* This is a heuristic, for newline translation might change
the string length. */
if (resize_buffer(self, len) < 0)
return -1;
self->pos = 0;
if (write_str(self, value) < 0)
return -1;
}
else {
if (resize_buffer(self, 0) < 0)
return -1;
}
self->pos = 0;
self->closed = 0;
self->ok = 1;
return 0;
}
/* Properties and pseudo-properties */
PyDoc_STRVAR(stringio_readable_doc,
"readable() -> bool. Returns True if the IO object can be read.");
PyDoc_STRVAR(stringio_writable_doc,
"writable() -> bool. Returns True if the IO object can be written.");
PyDoc_STRVAR(stringio_seekable_doc,
"seekable() -> bool. Returns True if the IO object can be seeked.");
static PyObject *
stringio_seekable(stringio *self, PyObject *args)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_TRUE;
}
static PyObject *
stringio_readable(stringio *self, PyObject *args)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_TRUE;
}
static PyObject *
stringio_writable(stringio *self, PyObject *args)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_TRUE;
}
/* Pickling support.
The implementation of __getstate__ is similar to the one for BytesIO,
except that we also save the newline parameter. For __setstate__ and unlike
BytesIO, we call __init__ to restore the object's state. Doing so allows us
to avoid decoding the complex newline state while keeping the object
representation compact.
See comment in bytesio.c regarding why only pickle protocols and onward are
supported.
*/
static PyObject *
stringio_getstate(stringio *self)
{
PyObject *initvalue = stringio_getvalue(self);
PyObject *dict;
PyObject *state;
if (initvalue == NULL)
return NULL;
if (self->dict == NULL) {
Py_INCREF(Py_None);
dict = Py_None;
}
else {
dict = PyDict_Copy(self->dict);
if (dict == NULL)
return NULL;
}
state = Py_BuildValue("(OOnN)", initvalue,
self->readnl ? self->readnl : Py_None,
self->pos, dict);
Py_DECREF(initvalue);
return state;
}
static PyObject *
stringio_setstate(stringio *self, PyObject *state)
{
PyObject *initarg;
PyObject *position_obj;
PyObject *dict;
Py_ssize_t pos;
assert(state != NULL);
CHECK_CLOSED(self);
/* We allow the state tuple to be longer than 4, because we may need
someday to extend the object's state without breaking
backward-compatibility. */
if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
PyErr_Format(PyExc_TypeError,
"%.200s.__setstate__ argument should be 4-tuple, got %.200s",
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
return NULL;
}
/* Initialize the object's state. */
initarg = PyTuple_GetSlice(state, 0, 2);
if (initarg == NULL)
return NULL;
if (stringio_init(self, initarg, NULL) < 0) {
Py_DECREF(initarg);
return NULL;
}
Py_DECREF(initarg);
/* Restore the buffer state. Even if __init__ did initialize the buffer,
we have to initialize it again since __init__ may translates the
newlines in the inital_value string. We clearly do not want that
because the string value in the state tuple has already been translated
once by __init__. So we do not take any chance and replace object's
buffer completely. */
{
Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0));
Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0));
if (resize_buffer(self, bufsize) < 0)
return NULL;
memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE));
self->string_size = bufsize;
}
/* Set carefully the position value. Alternatively, we could use the seek
method instead of modifying self->pos directly to better protect the
object internal state against errneous (or malicious) inputs. */
position_obj = PyTuple_GET_ITEM(state, 2);
if (!PyIndex_Check(position_obj)) {
PyErr_Format(PyExc_TypeError,
"third item of state must be an integer, got %.200s",
Py_TYPE(position_obj)->tp_name);
return NULL;
}
pos = PyNumber_AsSsize_t(position_obj, PyExc_OverflowError);
if (pos == -1 && PyErr_Occurred())
return NULL;
if (pos < 0) {
PyErr_SetString(PyExc_ValueError,
"position value cannot be negative");
return NULL;
}
self->pos = pos;
/* Set the dictionary of the instance variables. */
dict = PyTuple_GET_ITEM(state, 3);
if (dict != Py_None) {
if (!PyDict_Check(dict)) {
PyErr_Format(PyExc_TypeError,
"fourth item of state should be a dict, got a %.200s",
Py_TYPE(dict)->tp_name);
return NULL;
}
if (self->dict) {
/* Alternatively, we could replace the internal dictionary
completely. However, it seems more practical to just update it. */
if (PyDict_Update(self->dict, dict) < 0)
return NULL;
}
else {
Py_INCREF(dict);
self->dict = dict;
}
}
Py_RETURN_NONE;
}
static PyObject *
stringio_closed(stringio *self, void *context)
{
CHECK_INITIALIZED(self);
return PyBool_FromLong(self->closed);
}
static PyObject *
stringio_line_buffering(stringio *self, void *context)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_FALSE;
}
static PyObject *
stringio_newlines(stringio *self, void *context)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
if (self->decoder == NULL)
Py_RETURN_NONE;
return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
}
static struct PyMethodDef stringio_methods[] = {
{"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
{"getvalue", (PyCFunction)stringio_getvalue, METH_NOARGS, stringio_getvalue_doc},
{"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
{"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
{"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
{"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
{"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
{"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
{"seekable", (PyCFunction)stringio_seekable, METH_NOARGS, stringio_seekable_doc},
{"readable", (PyCFunction)stringio_readable, METH_NOARGS, stringio_readable_doc},
{"writable", (PyCFunction)stringio_writable, METH_NOARGS, stringio_writable_doc},
{"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
{"__setstate__", (PyCFunction)stringio_setstate, METH_O},
{NULL, NULL} /* sentinel */
};
static PyGetSetDef stringio_getset[] = {
{"closed", (getter)stringio_closed, NULL, NULL},
{"newlines", (getter)stringio_newlines, NULL, NULL},
/* (following comments straight off of the original Python wrapper:)
XXX Cruft to support the TextIOWrapper API. This would only
be meaningful if StringIO supported the buffer attribute.
Hopefully, a better solution, than adding these pseudo-attributes,
will be found.
*/
{"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
{NULL}
};
PyTypeObject PyStringIO_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_io.StringIO", /*tp_name*/
sizeof(stringio), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)stringio_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_reserved*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_HAVE_GC, /*tp_flags*/
stringio_doc, /*tp_doc*/
(traverseproc)stringio_traverse, /*tp_traverse*/
(inquiry)stringio_clear, /*tp_clear*/
0, /*tp_richcompare*/
offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
0, /*tp_iter*/
(iternextfunc)stringio_iternext, /*tp_iternext*/
stringio_methods, /*tp_methods*/
0, /*tp_members*/
stringio_getset, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
offsetof(stringio, dict), /*tp_dictoffset*/
(initproc)stringio_init, /*tp_init*/
0, /*tp_alloc*/
stringio_new, /*tp_new*/
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,758 @@
/***********************************************************
Copyright (C) 1997, 2002, 2003 Martin von Loewis
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies.
This software comes with no warranty. Use at your own risk.
******************************************************************/
#include "Python.h"
#include <stdio.h>
#include <locale.h>
#include <string.h>
#include <ctype.h>
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_LANGINFO_H
#include <langinfo.h>
#endif
#ifdef HAVE_LIBINTL_H
#include <libintl.h>
#endif
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
#if defined(MS_WINDOWS)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#ifdef RISCOS
char *strdup(const char *);
#endif
PyDoc_STRVAR(locale__doc__, "Support for POSIX locales.");
static PyObject *Error;
/* support functions for formatting floating point numbers */
PyDoc_STRVAR(setlocale__doc__,
"(integer,string=None) -> string. Activates/queries locale processing.");
/* the grouping is terminated by either 0 or CHAR_MAX */
static PyObject*
copy_grouping(char* s)
{
int i;
PyObject *result, *val = NULL;
if (s[0] == '\0')
/* empty string: no grouping at all */
return PyList_New(0);
for (i = 0; s[i] != '\0' && s[i] != CHAR_MAX; i++)
; /* nothing */
result = PyList_New(i+1);
if (!result)
return NULL;
i = -1;
do {
i++;
val = PyInt_FromLong(s[i]);
if (!val)
break;
if (PyList_SetItem(result, i, val)) {
Py_DECREF(val);
val = NULL;
break;
}
} while (s[i] != '\0' && s[i] != CHAR_MAX);
if (!val) {
Py_DECREF(result);
return NULL;
}
return result;
}
static void
fixup_ulcase(void)
{
PyObject *mods, *strop, *string, *ulo;
unsigned char ul[256];
int n, c;
/* find the string and strop modules */
mods = PyImport_GetModuleDict();
if (!mods)
return;
string = PyDict_GetItemString(mods, "string");
if (string)
string = PyModule_GetDict(string);
strop=PyDict_GetItemString(mods, "strop");
if (strop)
strop = PyModule_GetDict(strop);
if (!string && !strop)
return;
/* create uppercase map string */
n = 0;
for (c = 0; c < 256; c++) {
if (isupper(c))
ul[n++] = c;
}
ulo = PyString_FromStringAndSize((const char *)ul, n);
if (!ulo)
return;
if (string)
PyDict_SetItemString(string, "uppercase", ulo);
if (strop)
PyDict_SetItemString(strop, "uppercase", ulo);
Py_DECREF(ulo);
/* create lowercase string */
n = 0;
for (c = 0; c < 256; c++) {
if (islower(c))
ul[n++] = c;
}
ulo = PyString_FromStringAndSize((const char *)ul, n);
if (!ulo)
return;
if (string)
PyDict_SetItemString(string, "lowercase", ulo);
if (strop)
PyDict_SetItemString(strop, "lowercase", ulo);
Py_DECREF(ulo);
/* create letters string */
n = 0;
for (c = 0; c < 256; c++) {
if (isalpha(c))
ul[n++] = c;
}
ulo = PyString_FromStringAndSize((const char *)ul, n);
if (!ulo)
return;
if (string)
PyDict_SetItemString(string, "letters", ulo);
Py_DECREF(ulo);
}
static PyObject*
PyLocale_setlocale(PyObject* self, PyObject* args)
{
int category;
char *locale = NULL, *result;
PyObject *result_object;
if (!PyArg_ParseTuple(args, "i|z:setlocale", &category, &locale))
return NULL;
#if defined(MS_WINDOWS)
if (category < LC_MIN || category > LC_MAX)
{
PyErr_SetString(Error, "invalid locale category");
return NULL;
}
#endif
if (locale) {
/* set locale */
result = setlocale(category, locale);
if (!result) {
/* operation failed, no setting was changed */
PyErr_SetString(Error, "unsupported locale setting");
return NULL;
}
result_object = PyString_FromString(result);
if (!result_object)
return NULL;
/* record changes to LC_CTYPE */
if (category == LC_CTYPE || category == LC_ALL)
fixup_ulcase();
/* things that got wrong up to here are ignored */
PyErr_Clear();
} else {
/* get locale */
result = setlocale(category, NULL);
if (!result) {
PyErr_SetString(Error, "locale query failed");
return NULL;
}
result_object = PyString_FromString(result);
}
return result_object;
}
PyDoc_STRVAR(localeconv__doc__,
"() -> dict. Returns numeric and monetary locale-specific parameters.");
static PyObject*
PyLocale_localeconv(PyObject* self)
{
PyObject* result;
struct lconv *l;
PyObject *x;
result = PyDict_New();
if (!result)
return NULL;
/* if LC_NUMERIC is different in the C library, use saved value */
l = localeconv();
/* hopefully, the localeconv result survives the C library calls
involved herein */
#define RESULT_STRING(s)\
x = PyString_FromString(l->s);\
if (!x) goto failed;\
PyDict_SetItemString(result, #s, x);\
Py_XDECREF(x)
#define RESULT_INT(i)\
x = PyInt_FromLong(l->i);\
if (!x) goto failed;\
PyDict_SetItemString(result, #i, x);\
Py_XDECREF(x)
/* Numeric information */
RESULT_STRING(decimal_point);
RESULT_STRING(thousands_sep);
x = copy_grouping(l->grouping);
if (!x)
goto failed;
PyDict_SetItemString(result, "grouping", x);
Py_XDECREF(x);
/* Monetary information */
RESULT_STRING(int_curr_symbol);
RESULT_STRING(currency_symbol);
RESULT_STRING(mon_decimal_point);
RESULT_STRING(mon_thousands_sep);
x = copy_grouping(l->mon_grouping);
if (!x)
goto failed;
PyDict_SetItemString(result, "mon_grouping", x);
Py_XDECREF(x);
RESULT_STRING(positive_sign);
RESULT_STRING(negative_sign);
RESULT_INT(int_frac_digits);
RESULT_INT(frac_digits);
RESULT_INT(p_cs_precedes);
RESULT_INT(p_sep_by_space);
RESULT_INT(n_cs_precedes);
RESULT_INT(n_sep_by_space);
RESULT_INT(p_sign_posn);
RESULT_INT(n_sign_posn);
return result;
failed:
Py_XDECREF(result);
Py_XDECREF(x);
return NULL;
}
PyDoc_STRVAR(strcoll__doc__,
"string,string -> int. Compares two strings according to the locale.");
static PyObject*
PyLocale_strcoll(PyObject* self, PyObject* args)
{
#if !defined(HAVE_WCSCOLL) || !defined(Py_USING_UNICODE)
char *s1,*s2;
if (!PyArg_ParseTuple(args, "ss:strcoll", &s1, &s2))
return NULL;
return PyInt_FromLong(strcoll(s1, s2));
#else
PyObject *os1, *os2, *result = NULL;
wchar_t *ws1 = NULL, *ws2 = NULL;
int rel1 = 0, rel2 = 0, len1, len2;
if (!PyArg_UnpackTuple(args, "strcoll", 2, 2, &os1, &os2))
return NULL;
/* If both arguments are byte strings, use strcoll. */
if (PyString_Check(os1) && PyString_Check(os2))
return PyInt_FromLong(strcoll(PyString_AS_STRING(os1),
PyString_AS_STRING(os2)));
/* If neither argument is unicode, it's an error. */
if (!PyUnicode_Check(os1) && !PyUnicode_Check(os2)) {
PyErr_SetString(PyExc_ValueError, "strcoll arguments must be strings");
}
/* Convert the non-unicode argument to unicode. */
if (!PyUnicode_Check(os1)) {
os1 = PyUnicode_FromObject(os1);
if (!os1)
return NULL;
rel1 = 1;
}
if (!PyUnicode_Check(os2)) {
os2 = PyUnicode_FromObject(os2);
if (!os2) {
if (rel1) {
Py_DECREF(os1);
}
return NULL;
}
rel2 = 1;
}
/* Convert the unicode strings to wchar[]. */
len1 = PyUnicode_GET_SIZE(os1) + 1;
ws1 = PyMem_NEW(wchar_t, len1);
if (!ws1) {
PyErr_NoMemory();
goto done;
}
if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1)
goto done;
ws1[len1 - 1] = 0;
len2 = PyUnicode_GET_SIZE(os2) + 1;
ws2 = PyMem_NEW(wchar_t, len2);
if (!ws2) {
PyErr_NoMemory();
goto done;
}
if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1)
goto done;
ws2[len2 - 1] = 0;
/* Collate the strings. */
result = PyInt_FromLong(wcscoll(ws1, ws2));
done:
/* Deallocate everything. */
if (ws1) PyMem_FREE(ws1);
if (ws2) PyMem_FREE(ws2);
if (rel1) {
Py_DECREF(os1);
}
if (rel2) {
Py_DECREF(os2);
}
return result;
#endif
}
PyDoc_STRVAR(strxfrm__doc__,
"string -> string. Returns a string that behaves for cmp locale-aware.");
static PyObject*
PyLocale_strxfrm(PyObject* self, PyObject* args)
{
char *s, *buf;
size_t n1, n2;
PyObject *result;
if (!PyArg_ParseTuple(args, "s:strxfrm", &s))
return NULL;
/* assume no change in size, first */
n1 = strlen(s) + 1;
buf = PyMem_Malloc(n1);
if (!buf)
return PyErr_NoMemory();
n2 = strxfrm(buf, s, n1) + 1;
if (n2 > n1) {
/* more space needed */
buf = PyMem_Realloc(buf, n2);
if (!buf)
return PyErr_NoMemory();
strxfrm(buf, s, n2);
}
result = PyString_FromString(buf);
PyMem_Free(buf);
return result;
}
#if defined(MS_WINDOWS)
static PyObject*
PyLocale_getdefaultlocale(PyObject* self)
{
char encoding[100];
char locale[100];
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
if (GetLocaleInfo(LOCALE_USER_DEFAULT,
LOCALE_SISO639LANGNAME,
locale, sizeof(locale))) {
Py_ssize_t i = strlen(locale);
locale[i++] = '_';
if (GetLocaleInfo(LOCALE_USER_DEFAULT,
LOCALE_SISO3166CTRYNAME,
locale+i, (int)(sizeof(locale)-i)))
return Py_BuildValue("ss", locale, encoding);
}
/* If we end up here, this windows version didn't know about
ISO639/ISO3166 names (it's probably Windows 95). Return the
Windows language identifier instead (a hexadecimal number) */
locale[0] = '0';
locale[1] = 'x';
if (GetLocaleInfo(LOCALE_USER_DEFAULT, LOCALE_IDEFAULTLANGUAGE,
locale+2, sizeof(locale)-2)) {
return Py_BuildValue("ss", locale, encoding);
}
/* cannot determine the language code (very unlikely) */
Py_INCREF(Py_None);
return Py_BuildValue("Os", Py_None, encoding);
}
#endif
#ifdef HAVE_LANGINFO_H
#define LANGINFO(X) {#X, X}
static struct langinfo_constant{
char* name;
int value;
} langinfo_constants[] =
{
/* These constants should exist on any langinfo implementation */
LANGINFO(DAY_1),
LANGINFO(DAY_2),
LANGINFO(DAY_3),
LANGINFO(DAY_4),
LANGINFO(DAY_5),
LANGINFO(DAY_6),
LANGINFO(DAY_7),
LANGINFO(ABDAY_1),
LANGINFO(ABDAY_2),
LANGINFO(ABDAY_3),
LANGINFO(ABDAY_4),
LANGINFO(ABDAY_5),
LANGINFO(ABDAY_6),
LANGINFO(ABDAY_7),
LANGINFO(MON_1),
LANGINFO(MON_2),
LANGINFO(MON_3),
LANGINFO(MON_4),
LANGINFO(MON_5),
LANGINFO(MON_6),
LANGINFO(MON_7),
LANGINFO(MON_8),
LANGINFO(MON_9),
LANGINFO(MON_10),
LANGINFO(MON_11),
LANGINFO(MON_12),
LANGINFO(ABMON_1),
LANGINFO(ABMON_2),
LANGINFO(ABMON_3),
LANGINFO(ABMON_4),
LANGINFO(ABMON_5),
LANGINFO(ABMON_6),
LANGINFO(ABMON_7),
LANGINFO(ABMON_8),
LANGINFO(ABMON_9),
LANGINFO(ABMON_10),
LANGINFO(ABMON_11),
LANGINFO(ABMON_12),
#ifdef RADIXCHAR
/* The following are not available with glibc 2.0 */
LANGINFO(RADIXCHAR),
LANGINFO(THOUSEP),
/* YESSTR and NOSTR are deprecated in glibc, since they are
a special case of message translation, which should be rather
done using gettext. So we don't expose it to Python in the
first place.
LANGINFO(YESSTR),
LANGINFO(NOSTR),
*/
LANGINFO(CRNCYSTR),
#endif
LANGINFO(D_T_FMT),
LANGINFO(D_FMT),
LANGINFO(T_FMT),
LANGINFO(AM_STR),
LANGINFO(PM_STR),
/* The following constants are available only with XPG4, but...
AIX 3.2. only has CODESET.
OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have
a few of the others.
Solution: ifdef-test them all. */
#ifdef CODESET
LANGINFO(CODESET),
#endif
#ifdef T_FMT_AMPM
LANGINFO(T_FMT_AMPM),
#endif
#ifdef ERA
LANGINFO(ERA),
#endif
#ifdef ERA_D_FMT
LANGINFO(ERA_D_FMT),
#endif
#ifdef ERA_D_T_FMT
LANGINFO(ERA_D_T_FMT),
#endif
#ifdef ERA_T_FMT
LANGINFO(ERA_T_FMT),
#endif
#ifdef ALT_DIGITS
LANGINFO(ALT_DIGITS),
#endif
#ifdef YESEXPR
LANGINFO(YESEXPR),
#endif
#ifdef NOEXPR
LANGINFO(NOEXPR),
#endif
#ifdef _DATE_FMT
/* This is not available in all glibc versions that have CODESET. */
LANGINFO(_DATE_FMT),
#endif
{0, 0}
};
PyDoc_STRVAR(nl_langinfo__doc__,
"nl_langinfo(key) -> string\n"
"Return the value for the locale information associated with key.");
static PyObject*
PyLocale_nl_langinfo(PyObject* self, PyObject* args)
{
int item, i;
if (!PyArg_ParseTuple(args, "i:nl_langinfo", &item))
return NULL;
/* Check whether this is a supported constant. GNU libc sometimes
returns numeric values in the char* return value, which would
crash PyString_FromString. */
for (i = 0; langinfo_constants[i].name; i++)
if (langinfo_constants[i].value == item) {
/* Check NULL as a workaround for GNU libc's returning NULL
instead of an empty string for nl_langinfo(ERA). */
const char *result = nl_langinfo(item);
return PyString_FromString(result != NULL ? result : "");
}
PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant");
return NULL;
}
#endif /* HAVE_LANGINFO_H */
#ifdef HAVE_LIBINTL_H
PyDoc_STRVAR(gettext__doc__,
"gettext(msg) -> string\n"
"Return translation of msg.");
static PyObject*
PyIntl_gettext(PyObject* self, PyObject *args)
{
char *in;
if (!PyArg_ParseTuple(args, "s", &in))
return 0;
return PyString_FromString(gettext(in));
}
PyDoc_STRVAR(dgettext__doc__,
"dgettext(domain, msg) -> string\n"
"Return translation of msg in domain.");
static PyObject*
PyIntl_dgettext(PyObject* self, PyObject *args)
{
char *domain, *in;
if (!PyArg_ParseTuple(args, "zs", &domain, &in))
return 0;
return PyString_FromString(dgettext(domain, in));
}
PyDoc_STRVAR(dcgettext__doc__,
"dcgettext(domain, msg, category) -> string\n"
"Return translation of msg in domain and category.");
static PyObject*
PyIntl_dcgettext(PyObject *self, PyObject *args)
{
char *domain, *msgid;
int category;
if (!PyArg_ParseTuple(args, "zsi", &domain, &msgid, &category))
return 0;
return PyString_FromString(dcgettext(domain,msgid,category));
}
PyDoc_STRVAR(textdomain__doc__,
"textdomain(domain) -> string\n"
"Set the C library's textdmain to domain, returning the new domain.");
static PyObject*
PyIntl_textdomain(PyObject* self, PyObject* args)
{
char *domain;
if (!PyArg_ParseTuple(args, "z", &domain))
return 0;
domain = textdomain(domain);
if (!domain) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
return PyString_FromString(domain);
}
PyDoc_STRVAR(bindtextdomain__doc__,
"bindtextdomain(domain, dir) -> string\n"
"Bind the C library's domain to dir.");
static PyObject*
PyIntl_bindtextdomain(PyObject* self,PyObject*args)
{
char *domain, *dirname;
if (!PyArg_ParseTuple(args, "sz", &domain, &dirname))
return 0;
if (!strlen(domain)) {
PyErr_SetString(Error, "domain must be a non-empty string");
return 0;
}
dirname = bindtextdomain(domain, dirname);
if (!dirname) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
return PyString_FromString(dirname);
}
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
PyDoc_STRVAR(bind_textdomain_codeset__doc__,
"bind_textdomain_codeset(domain, codeset) -> string\n"
"Bind the C library's domain to codeset.");
static PyObject*
PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
{
char *domain,*codeset;
if (!PyArg_ParseTuple(args, "sz", &domain, &codeset))
return NULL;
codeset = bind_textdomain_codeset(domain, codeset);
if (codeset)
return PyString_FromString(codeset);
Py_RETURN_NONE;
}
#endif
#endif
static struct PyMethodDef PyLocale_Methods[] = {
{"setlocale", (PyCFunction) PyLocale_setlocale,
METH_VARARGS, setlocale__doc__},
{"localeconv", (PyCFunction) PyLocale_localeconv,
METH_NOARGS, localeconv__doc__},
{"strcoll", (PyCFunction) PyLocale_strcoll,
METH_VARARGS, strcoll__doc__},
{"strxfrm", (PyCFunction) PyLocale_strxfrm,
METH_VARARGS, strxfrm__doc__},
#if defined(MS_WINDOWS)
{"_getdefaultlocale", (PyCFunction) PyLocale_getdefaultlocale, METH_NOARGS},
#endif
#ifdef HAVE_LANGINFO_H
{"nl_langinfo", (PyCFunction) PyLocale_nl_langinfo,
METH_VARARGS, nl_langinfo__doc__},
#endif
#ifdef HAVE_LIBINTL_H
{"gettext",(PyCFunction)PyIntl_gettext,METH_VARARGS,
gettext__doc__},
{"dgettext",(PyCFunction)PyIntl_dgettext,METH_VARARGS,
dgettext__doc__},
{"dcgettext",(PyCFunction)PyIntl_dcgettext,METH_VARARGS,
dcgettext__doc__},
{"textdomain",(PyCFunction)PyIntl_textdomain,METH_VARARGS,
textdomain__doc__},
{"bindtextdomain",(PyCFunction)PyIntl_bindtextdomain,METH_VARARGS,
bindtextdomain__doc__},
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
{"bind_textdomain_codeset",(PyCFunction)PyIntl_bind_textdomain_codeset,
METH_VARARGS, bind_textdomain_codeset__doc__},
#endif
#endif
{NULL, NULL}
};
PyMODINIT_FUNC
init_locale(void)
{
PyObject *m, *d, *x;
#ifdef HAVE_LANGINFO_H
int i;
#endif
m = Py_InitModule("_locale", PyLocale_Methods);
if (m == NULL)
return;
d = PyModule_GetDict(m);
x = PyInt_FromLong(LC_CTYPE);
PyDict_SetItemString(d, "LC_CTYPE", x);
Py_XDECREF(x);
x = PyInt_FromLong(LC_TIME);
PyDict_SetItemString(d, "LC_TIME", x);
Py_XDECREF(x);
x = PyInt_FromLong(LC_COLLATE);
PyDict_SetItemString(d, "LC_COLLATE", x);
Py_XDECREF(x);
x = PyInt_FromLong(LC_MONETARY);
PyDict_SetItemString(d, "LC_MONETARY", x);
Py_XDECREF(x);
#ifdef LC_MESSAGES
x = PyInt_FromLong(LC_MESSAGES);
PyDict_SetItemString(d, "LC_MESSAGES", x);
Py_XDECREF(x);
#endif /* LC_MESSAGES */
x = PyInt_FromLong(LC_NUMERIC);
PyDict_SetItemString(d, "LC_NUMERIC", x);
Py_XDECREF(x);
x = PyInt_FromLong(LC_ALL);
PyDict_SetItemString(d, "LC_ALL", x);
Py_XDECREF(x);
x = PyInt_FromLong(CHAR_MAX);
PyDict_SetItemString(d, "CHAR_MAX", x);
Py_XDECREF(x);
Error = PyErr_NewException("locale.Error", NULL, NULL);
PyDict_SetItemString(d, "Error", Error);
x = PyString_FromString(locale__doc__);
PyDict_SetItemString(d, "__doc__", x);
Py_XDECREF(x);
#ifdef HAVE_LANGINFO_H
for (i = 0; langinfo_constants[i].name; i++) {
PyModule_AddIntConstant(m, langinfo_constants[i].name,
langinfo_constants[i].value);
}
#endif
}
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View File

@ -0,0 +1,255 @@
/* Definitions of some C99 math library functions, for those platforms
that don't implement these functions already. */
#include "Python.h"
#include <float.h>
#include "_math.h"
/* The following copyright notice applies to the original
implementations of acosh, asinh and atanh. */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
static const double ln2 = 6.93147180559945286227E-01;
static const double two_pow_m28 = 3.7252902984619141E-09; /* 2**-28 */
static const double two_pow_p28 = 268435456.0; /* 2**28 */
static const double zero = 0.0;
/* acosh(x)
* Method :
* Based on
* acosh(x) = log [ x + sqrt(x*x-1) ]
* we have
* acosh(x) := log(x)+ln2, if x is large; else
* acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
* acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
*
* Special cases:
* acosh(x) is NaN with signal if x<1.
* acosh(NaN) is NaN without signal.
*/
double
_Py_acosh(double x)
{
if (Py_IS_NAN(x)) {
return x+x;
}
if (x < 1.) { /* x < 1; return a signaling NaN */
errno = EDOM;
#ifdef Py_NAN
return Py_NAN;
#else
return (x-x)/(x-x);
#endif
}
else if (x >= two_pow_p28) { /* x > 2**28 */
if (Py_IS_INFINITY(x)) {
return x+x;
}
else {
return log(x)+ln2; /* acosh(huge)=log(2x) */
}
}
else if (x == 1.) {
return 0.0; /* acosh(1) = 0 */
}
else if (x > 2.) { /* 2 < x < 2**28 */
double t = x*x;
return log(2.0*x - 1.0 / (x + sqrt(t - 1.0)));
}
else { /* 1 < x <= 2 */
double t = x - 1.0;
return m_log1p(t + sqrt(2.0*t + t*t));
}
}
/* asinh(x)
* Method :
* Based on
* asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
* we have
* asinh(x) := x if 1+x*x=1,
* := sign(x)*(log(x)+ln2)) for large |x|, else
* := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
* := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
*/
double
_Py_asinh(double x)
{
double w;
double absx = fabs(x);
if (Py_IS_NAN(x) || Py_IS_INFINITY(x)) {
return x+x;
}
if (absx < two_pow_m28) { /* |x| < 2**-28 */
return x; /* return x inexact except 0 */
}
if (absx > two_pow_p28) { /* |x| > 2**28 */
w = log(absx)+ln2;
}
else if (absx > 2.0) { /* 2 < |x| < 2**28 */
w = log(2.0*absx + 1.0 / (sqrt(x*x + 1.0) + absx));
}
else { /* 2**-28 <= |x| < 2= */
double t = x*x;
w = m_log1p(absx + t / (1.0 + sqrt(1.0 + t)));
}
return copysign(w, x);
}
/* atanh(x)
* Method :
* 1.Reduced x to positive by atanh(-x) = -atanh(x)
* 2.For x>=0.5
* 1 2x x
* atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * -------)
* 2 1 - x 1 - x
*
* For x<0.5
* atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
*
* Special cases:
* atanh(x) is NaN if |x| >= 1 with signal;
* atanh(NaN) is that NaN with no signal;
*
*/
double
_Py_atanh(double x)
{
double absx;
double t;
if (Py_IS_NAN(x)) {
return x+x;
}
absx = fabs(x);
if (absx >= 1.) { /* |x| >= 1 */
errno = EDOM;
#ifdef Py_NAN
return Py_NAN;
#else
return x/zero;
#endif
}
if (absx < two_pow_m28) { /* |x| < 2**-28 */
return x;
}
if (absx < 0.5) { /* |x| < 0.5 */
t = absx+absx;
t = 0.5 * m_log1p(t + t*absx / (1.0 - absx));
}
else { /* 0.5 <= |x| <= 1.0 */
t = 0.5 * m_log1p((absx + absx) / (1.0 - absx));
}
return copysign(t, x);
}
/* Mathematically, expm1(x) = exp(x) - 1. The expm1 function is designed
to avoid the significant loss of precision that arises from direct
evaluation of the expression exp(x) - 1, for x near 0. */
double
_Py_expm1(double x)
{
/* For abs(x) >= log(2), it's safe to evaluate exp(x) - 1 directly; this
also works fine for infinities and nans.
For smaller x, we can use a method due to Kahan that achieves close to
full accuracy.
*/
if (fabs(x) < 0.7) {
double u;
u = exp(x);
if (u == 1.0)
return x;
else
return (u - 1.0) * x / log(u);
}
else
return exp(x) - 1.0;
}
/* log1p(x) = log(1+x). The log1p function is designed to avoid the
significant loss of precision that arises from direct evaluation when x is
small. */
#ifdef HAVE_LOG1P
double
_Py_log1p(double x)
{
/* Some platforms supply a log1p function but don't respect the sign of
zero: log1p(-0.0) gives 0.0 instead of the correct result of -0.0.
To save fiddling with configure tests and platform checks, we handle the
special case of zero input directly on all platforms.
*/
if (x == 0.0) {
return x;
}
else {
return log1p(x);
}
}
#else
double
_Py_log1p(double x)
{
/* For x small, we use the following approach. Let y be the nearest float
to 1+x, then
1+x = y * (1 - (y-1-x)/y)
so log(1+x) = log(y) + log(1-(y-1-x)/y). Since (y-1-x)/y is tiny, the
second term is well approximated by (y-1-x)/y. If abs(x) >=
DBL_EPSILON/2 or the rounding-mode is some form of round-to-nearest
then y-1-x will be exactly representable, and is computed exactly by
(y-1)-x.
If abs(x) < DBL_EPSILON/2 and the rounding mode is not known to be
round-to-nearest then this method is slightly dangerous: 1+x could be
rounded up to 1+DBL_EPSILON instead of down to 1, and in that case
y-1-x will not be exactly representable any more and the result can be
off by many ulps. But this is easily fixed: for a floating-point
number |x| < DBL_EPSILON/2., the closest floating-point number to
log(1+x) is exactly x.
*/
double y;
if (fabs(x) < DBL_EPSILON/2.) {
return x;
}
else if (-0.5 <= x && x <= 1.) {
/* WARNING: it's possible than an overeager compiler
will incorrectly optimize the following two lines
to the equivalent of "return log(1.+x)". If this
happens, then results from log1p will be inaccurate
for small x. */
y = 1.+x;
return log(y)-((y-1.)-x)/y;
}
else {
/* NaNs and infinities should end up here */
return log(1.+x);
}
}
#endif /* ifdef HAVE_LOG1P */

View File

@ -0,0 +1,41 @@
double _Py_acosh(double x);
double _Py_asinh(double x);
double _Py_atanh(double x);
double _Py_expm1(double x);
double _Py_log1p(double x);
#ifdef HAVE_ACOSH
#define m_acosh acosh
#else
/* if the system doesn't have acosh, use the substitute
function defined in Modules/_math.c. */
#define m_acosh _Py_acosh
#endif
#ifdef HAVE_ASINH
#define m_asinh asinh
#else
/* if the system doesn't have asinh, use the substitute
function defined in Modules/_math.c. */
#define m_asinh _Py_asinh
#endif
#ifdef HAVE_ATANH
#define m_atanh atanh
#else
/* if the system doesn't have atanh, use the substitute
function defined in Modules/_math.c. */
#define m_atanh _Py_atanh
#endif
#ifdef HAVE_EXPM1
#define m_expm1 expm1
#else
/* if the system doesn't have expm1, use the substitute
function defined in Modules/_math.c. */
#define m_expm1 _Py_expm1
#endif
/* Use the substitute from _math.c on all platforms:
it includes workarounds for buggy handling of zeros. */
#define m_log1p _Py_log1p

View File

@ -0,0 +1,595 @@
/* Random objects */
/* ------------------------------------------------------------------
The code in this module was based on a download from:
http://www.math.keio.ac.jp/~matumoto/MT2002/emt19937ar.html
It was modified in 2002 by Raymond Hettinger as follows:
* the principal computational lines untouched.
* renamed genrand_res53() to random_random() and wrapped
in python calling/return code.
* genrand_int32() and the helper functions, init_genrand()
and init_by_array(), were declared static, wrapped in
Python calling/return code. also, their global data
references were replaced with structure references.
* unused functions from the original were deleted.
new, original C python code was added to implement the
Random() interface.
The following are the verbatim comments from the original code:
A C-program for MT19937, with initialization improved 2002/1/26.
Coded by Takuji Nishimura and Makoto Matsumoto.
Before using, initialize the state by using init_genrand(seed)
or init_by_array(init_key, key_length).
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of its contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.keio.ac.jp/matumoto/emt.html
email: matumoto@math.keio.ac.jp
*/
/* ---------------------------------------------------------------*/
#include "Python.h"
#include <time.h> /* for seeding to current time */
/* Period parameters -- These are all magic. Don't change. */
#define N 624
#define M 397
#define MATRIX_A 0x9908b0dfUL /* constant vector a */
#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
typedef struct {
PyObject_HEAD
unsigned long state[N];
int index;
} RandomObject;
static PyTypeObject Random_Type;
#define RandomObject_Check(v) (Py_TYPE(v) == &Random_Type)
/* Random methods */
/* generates a random number on [0,0xffffffff]-interval */
static unsigned long
genrand_int32(RandomObject *self)
{
unsigned long y;
static unsigned long mag01[2]={0x0UL, MATRIX_A};
/* mag01[x] = x * MATRIX_A for x=0,1 */
unsigned long *mt;
mt = self->state;
if (self->index >= N) { /* generate N words at one time */
int kk;
for (kk=0;kk<N-M;kk++) {
y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
}
for (;kk<N-1;kk++) {
y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
}
y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
self->index = 0;
}
y = mt[self->index++];
y ^= (y >> 11);
y ^= (y << 7) & 0x9d2c5680UL;
y ^= (y << 15) & 0xefc60000UL;
y ^= (y >> 18);
return y;
}
/* random_random is the function named genrand_res53 in the original code;
* generates a random number on [0,1) with 53-bit resolution; note that
* 9007199254740992 == 2**53; I assume they're spelling "/2**53" as
* multiply-by-reciprocal in the (likely vain) hope that the compiler will
* optimize the division away at compile-time. 67108864 is 2**26. In
* effect, a contains 27 random bits shifted left 26, and b fills in the
* lower 26 bits of the 53-bit numerator.
* The orginal code credited Isaku Wada for this algorithm, 2002/01/09.
*/
static PyObject *
random_random(RandomObject *self)
{
unsigned long a=genrand_int32(self)>>5, b=genrand_int32(self)>>6;
return PyFloat_FromDouble((a*67108864.0+b)*(1.0/9007199254740992.0));
}
/* initializes mt[N] with a seed */
static void
init_genrand(RandomObject *self, unsigned long s)
{
int mti;
unsigned long *mt;
mt = self->state;
mt[0]= s & 0xffffffffUL;
for (mti=1; mti<N; mti++) {
mt[mti] =
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
/* In the previous versions, MSBs of the seed affect */
/* only MSBs of the array mt[]. */
/* 2002/01/09 modified by Makoto Matsumoto */
mt[mti] &= 0xffffffffUL;
/* for >32 bit machines */
}
self->index = mti;
return;
}
/* initialize by an array with array-length */
/* init_key is the array for initializing keys */
/* key_length is its length */
static PyObject *
init_by_array(RandomObject *self, unsigned long init_key[], unsigned long key_length)
{
unsigned int i, j, k; /* was signed in the original code. RDH 12/16/2002 */
unsigned long *mt;
mt = self->state;
init_genrand(self, 19650218UL);
i=1; j=0;
k = (N>key_length ? N : key_length);
for (; k; k--) {
mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
+ init_key[j] + j; /* non linear */
mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
i++; j++;
if (i>=N) { mt[0] = mt[N-1]; i=1; }
if (j>=key_length) j=0;
}
for (k=N-1; k; k--) {
mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
- i; /* non linear */
mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
i++;
if (i>=N) { mt[0] = mt[N-1]; i=1; }
}
mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
Py_INCREF(Py_None);
return Py_None;
}
/*
* The rest is Python-specific code, neither part of, nor derived from, the
* Twister download.
*/
static PyObject *
random_seed(RandomObject *self, PyObject *args)
{
PyObject *result = NULL; /* guilty until proved innocent */
PyObject *masklower = NULL;
PyObject *thirtytwo = NULL;
PyObject *n = NULL;
unsigned long *key = NULL;
unsigned long keymax; /* # of allocated slots in key */
unsigned long keyused; /* # of used slots in key */
int err;
PyObject *arg = NULL;
if (!PyArg_UnpackTuple(args, "seed", 0, 1, &arg))
return NULL;
if (arg == NULL || arg == Py_None) {
time_t now;
time(&now);
init_genrand(self, (unsigned long)now);
Py_INCREF(Py_None);
return Py_None;
}
/* If the arg is an int or long, use its absolute value; else use
* the absolute value of its hash code.
*/
if (PyInt_Check(arg) || PyLong_Check(arg))
n = PyNumber_Absolute(arg);
else {
long hash = PyObject_Hash(arg);
if (hash == -1)
goto Done;
n = PyLong_FromUnsignedLong((unsigned long)hash);
}
if (n == NULL)
goto Done;
/* Now split n into 32-bit chunks, from the right. Each piece is
* stored into key, which has a capacity of keymax chunks, of which
* keyused are filled. Alas, the repeated shifting makes this a
* quadratic-time algorithm; we'd really like to use
* _PyLong_AsByteArray here, but then we'd have to break into the
* long representation to figure out how big an array was needed
* in advance.
*/
keymax = 8; /* arbitrary; grows later if needed */
keyused = 0;
key = (unsigned long *)PyMem_Malloc(keymax * sizeof(*key));
if (key == NULL)
goto Done;
masklower = PyLong_FromUnsignedLong(0xffffffffU);
if (masklower == NULL)
goto Done;
thirtytwo = PyInt_FromLong(32L);
if (thirtytwo == NULL)
goto Done;
while ((err=PyObject_IsTrue(n))) {
PyObject *newn;
PyObject *pychunk;
unsigned long chunk;
if (err == -1)
goto Done;
pychunk = PyNumber_And(n, masklower);
if (pychunk == NULL)
goto Done;
chunk = PyLong_AsUnsignedLong(pychunk);
Py_DECREF(pychunk);
if (chunk == (unsigned long)-1 && PyErr_Occurred())
goto Done;
newn = PyNumber_Rshift(n, thirtytwo);
if (newn == NULL)
goto Done;
Py_DECREF(n);
n = newn;
if (keyused >= keymax) {
unsigned long bigger = keymax << 1;
if ((bigger >> 1) != keymax) {
PyErr_NoMemory();
goto Done;
}
key = (unsigned long *)PyMem_Realloc(key,
bigger * sizeof(*key));
if (key == NULL)
goto Done;
keymax = bigger;
}
assert(keyused < keymax);
key[keyused++] = chunk;
}
if (keyused == 0)
key[keyused++] = 0UL;
result = init_by_array(self, key, keyused);
Done:
Py_XDECREF(masklower);
Py_XDECREF(thirtytwo);
Py_XDECREF(n);
PyMem_Free(key);
return result;
}
static PyObject *
random_getstate(RandomObject *self)
{
PyObject *state;
PyObject *element;
int i;
state = PyTuple_New(N+1);
if (state == NULL)
return NULL;
for (i=0; i<N ; i++) {
element = PyLong_FromUnsignedLong(self->state[i]);
if (element == NULL)
goto Fail;
PyTuple_SET_ITEM(state, i, element);
}
element = PyLong_FromLong((long)(self->index));
if (element == NULL)
goto Fail;
PyTuple_SET_ITEM(state, i, element);
return state;
Fail:
Py_DECREF(state);
return NULL;
}
static PyObject *
random_setstate(RandomObject *self, PyObject *state)
{
int i;
unsigned long element;
long index;
if (!PyTuple_Check(state)) {
PyErr_SetString(PyExc_TypeError,
"state vector must be a tuple");
return NULL;
}
if (PyTuple_Size(state) != N+1) {
PyErr_SetString(PyExc_ValueError,
"state vector is the wrong size");
return NULL;
}
for (i=0; i<N ; i++) {
element = PyLong_AsUnsignedLong(PyTuple_GET_ITEM(state, i));
if (element == (unsigned long)-1 && PyErr_Occurred())
return NULL;
self->state[i] = element & 0xffffffffUL; /* Make sure we get sane state */
}
index = PyLong_AsLong(PyTuple_GET_ITEM(state, i));
if (index == -1 && PyErr_Occurred())
return NULL;
self->index = (int)index;
Py_INCREF(Py_None);
return Py_None;
}
/*
Jumpahead should be a fast way advance the generator n-steps ahead, but
lacking a formula for that, the next best is to use n and the existing
state to create a new state far away from the original.
The generator uses constant spaced additive feedback, so shuffling the
state elements ought to produce a state which would not be encountered
(in the near term) by calls to random(). Shuffling is normally
implemented by swapping the ith element with another element ranging
from 0 to i inclusive. That allows the element to have the possibility
of not being moved. Since the goal is to produce a new, different
state, the swap element is ranged from 0 to i-1 inclusive. This assures
that each element gets moved at least once.
To make sure that consecutive calls to jumpahead(n) produce different
states (even in the rare case of involutory shuffles), i+1 is added to
each element at position i. Successive calls are then guaranteed to
have changing (growing) values as well as shuffled positions.
Finally, the self->index value is set to N so that the generator itself
kicks in on the next call to random(). This assures that all results
have been through the generator and do not just reflect alterations to
the underlying state.
*/
static PyObject *
random_jumpahead(RandomObject *self, PyObject *n)
{
long i, j;
PyObject *iobj;
PyObject *remobj;
unsigned long *mt, tmp, nonzero;
if (!PyInt_Check(n) && !PyLong_Check(n)) {
PyErr_Format(PyExc_TypeError, "jumpahead requires an "
"integer, not '%s'",
Py_TYPE(n)->tp_name);
return NULL;
}
mt = self->state;
for (i = N-1; i > 1; i--) {
iobj = PyInt_FromLong(i);
if (iobj == NULL)
return NULL;
remobj = PyNumber_Remainder(n, iobj);
Py_DECREF(iobj);
if (remobj == NULL)
return NULL;
j = PyInt_AsLong(remobj);
Py_DECREF(remobj);
if (j == -1L && PyErr_Occurred())
return NULL;
tmp = mt[i];
mt[i] = mt[j];
mt[j] = tmp;
}
nonzero = 0;
for (i = 1; i < N; i++) {
mt[i] += i+1;
mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
nonzero |= mt[i];
}
/* Ensure the state is nonzero: in the unlikely event that mt[1] through
mt[N-1] are all zero, set the MSB of mt[0] (see issue #14591). In the
normal case, we fall back to the pre-issue 14591 behaviour for mt[0]. */
if (nonzero) {
mt[0] += 1;
mt[0] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
}
else {
mt[0] = 0x80000000UL;
}
self->index = N;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject *
random_getrandbits(RandomObject *self, PyObject *args)
{
int k, i, bytes;
unsigned long r;
unsigned char *bytearray;
PyObject *result;
if (!PyArg_ParseTuple(args, "i:getrandbits", &k))
return NULL;
if (k <= 0) {
PyErr_SetString(PyExc_ValueError,
"number of bits must be greater than zero");
return NULL;
}
bytes = ((k - 1) / 32 + 1) * 4;
bytearray = (unsigned char *)PyMem_Malloc(bytes);
if (bytearray == NULL) {
PyErr_NoMemory();
return NULL;
}
/* Fill-out whole words, byte-by-byte to avoid endianness issues */
for (i=0 ; i<bytes ; i+=4, k-=32) {
r = genrand_int32(self);
if (k < 32)
r >>= (32 - k);
bytearray[i+0] = (unsigned char)r;
bytearray[i+1] = (unsigned char)(r >> 8);
bytearray[i+2] = (unsigned char)(r >> 16);
bytearray[i+3] = (unsigned char)(r >> 24);
}
/* little endian order to match bytearray assignment order */
result = _PyLong_FromByteArray(bytearray, bytes, 1, 0);
PyMem_Free(bytearray);
return result;
}
static PyObject *
random_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
RandomObject *self;
PyObject *tmp;
if (type == &Random_Type && !_PyArg_NoKeywords("Random()", kwds))
return NULL;
self = (RandomObject *)type->tp_alloc(type, 0);
if (self == NULL)
return NULL;
tmp = random_seed(self, args);
if (tmp == NULL) {
Py_DECREF(self);
return NULL;
}
Py_DECREF(tmp);
return (PyObject *)self;
}
static PyMethodDef random_methods[] = {
{"random", (PyCFunction)random_random, METH_NOARGS,
PyDoc_STR("random() -> x in the interval [0, 1).")},
{"seed", (PyCFunction)random_seed, METH_VARARGS,
PyDoc_STR("seed([n]) -> None. Defaults to current time.")},
{"getstate", (PyCFunction)random_getstate, METH_NOARGS,
PyDoc_STR("getstate() -> tuple containing the current state.")},
{"setstate", (PyCFunction)random_setstate, METH_O,
PyDoc_STR("setstate(state) -> None. Restores generator state.")},
{"jumpahead", (PyCFunction)random_jumpahead, METH_O,
PyDoc_STR("jumpahead(int) -> None. Create new state from "
"existing state and integer.")},
{"getrandbits", (PyCFunction)random_getrandbits, METH_VARARGS,
PyDoc_STR("getrandbits(k) -> x. Generates a long int with "
"k random bits.")},
{NULL, NULL} /* sentinel */
};
PyDoc_STRVAR(random_doc,
"Random() -> create a random number generator with its own internal state.");
static PyTypeObject Random_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_random.Random", /*tp_name*/
sizeof(RandomObject), /*tp_basicsize*/
0, /*tp_itemsize*/
/* methods */
0, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
PyObject_GenericGetAttr, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
random_doc, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
random_methods, /*tp_methods*/
0, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
0, /*tp_dictoffset*/
0, /*tp_init*/
0, /*tp_alloc*/
random_new, /*tp_new*/
_PyObject_Del, /*tp_free*/
0, /*tp_is_gc*/
};
PyDoc_STRVAR(module_doc,
"Module implements the Mersenne Twister random number generator.");
PyMODINIT_FUNC
init_random(void)
{
PyObject *m;
if (PyType_Ready(&Random_Type) < 0)
return;
m = Py_InitModule3("_random", NULL, module_doc);
if (m == NULL)
return;
Py_INCREF(&Random_Type);
PyModule_AddObject(m, "Random", (PyObject *)&Random_Type);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,112 @@
#include "Python.h"
#define GET_WEAKREFS_LISTPTR(o) \
((PyWeakReference **) PyObject_GET_WEAKREFS_LISTPTR(o))
PyDoc_STRVAR(weakref_getweakrefcount__doc__,
"getweakrefcount(object) -- return the number of weak references\n"
"to 'object'.");
static PyObject *
weakref_getweakrefcount(PyObject *self, PyObject *object)
{
PyObject *result = NULL;
if (PyType_SUPPORTS_WEAKREFS(Py_TYPE(object))) {
PyWeakReference **list = GET_WEAKREFS_LISTPTR(object);
result = PyInt_FromSsize_t(_PyWeakref_GetWeakrefCount(*list));
}
else
result = PyInt_FromLong(0);
return result;
}
PyDoc_STRVAR(weakref_getweakrefs__doc__,
"getweakrefs(object) -- return a list of all weak reference objects\n"
"that point to 'object'.");
static PyObject *
weakref_getweakrefs(PyObject *self, PyObject *object)
{
PyObject *result = NULL;
if (PyType_SUPPORTS_WEAKREFS(Py_TYPE(object))) {
PyWeakReference **list = GET_WEAKREFS_LISTPTR(object);
Py_ssize_t count = _PyWeakref_GetWeakrefCount(*list);
result = PyList_New(count);
if (result != NULL) {
PyWeakReference *current = *list;
Py_ssize_t i;
for (i = 0; i < count; ++i) {
PyList_SET_ITEM(result, i, (PyObject *) current);
Py_INCREF(current);
current = current->wr_next;
}
}
}
else {
result = PyList_New(0);
}
return result;
}
PyDoc_STRVAR(weakref_proxy__doc__,
"proxy(object[, callback]) -- create a proxy object that weakly\n"
"references 'object'. 'callback', if given, is called with a\n"
"reference to the proxy when 'object' is about to be finalized.");
static PyObject *
weakref_proxy(PyObject *self, PyObject *args)
{
PyObject *object;
PyObject *callback = NULL;
PyObject *result = NULL;
if (PyArg_UnpackTuple(args, "proxy", 1, 2, &object, &callback)) {
result = PyWeakref_NewProxy(object, callback);
}
return result;
}
static PyMethodDef
weakref_functions[] = {
{"getweakrefcount", weakref_getweakrefcount, METH_O,
weakref_getweakrefcount__doc__},
{"getweakrefs", weakref_getweakrefs, METH_O,
weakref_getweakrefs__doc__},
{"proxy", weakref_proxy, METH_VARARGS,
weakref_proxy__doc__},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC
init_weakref(void)
{
PyObject *m;
m = Py_InitModule3("_weakref", weakref_functions,
"Weak-reference support module.");
if (m != NULL) {
Py_INCREF(&_PyWeakref_RefType);
PyModule_AddObject(m, "ref",
(PyObject *) &_PyWeakref_RefType);
Py_INCREF(&_PyWeakref_RefType);
PyModule_AddObject(m, "ReferenceType",
(PyObject *) &_PyWeakref_RefType);
Py_INCREF(&_PyWeakref_ProxyType);
PyModule_AddObject(m, "ProxyType",
(PyObject *) &_PyWeakref_ProxyType);
Py_INCREF(&_PyWeakref_CallableProxyType);
PyModule_AddObject(m, "CallableProxyType",
(PyObject *) &_PyWeakref_CallableProxyType);
}
}

View File

@ -0,0 +1,176 @@
/*
* Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef HAVE_GETADDRINFO
/*
* Error return codes from getaddrinfo()
*/
#ifdef EAI_ADDRFAMILY
/* If this is defined, there is a conflicting implementation
in the C library, which can't be used for some reason.
Make sure it won't interfere with this emulation. */
#undef EAI_ADDRFAMILY
#undef EAI_AGAIN
#undef EAI_BADFLAGS
#undef EAI_FAIL
#undef EAI_FAMILY
#undef EAI_MEMORY
#undef EAI_NODATA
#undef EAI_NONAME
#undef EAI_SERVICE
#undef EAI_SOCKTYPE
#undef EAI_SYSTEM
#undef EAI_BADHINTS
#undef EAI_PROTOCOL
#undef EAI_MAX
#undef getaddrinfo
#define getaddrinfo fake_getaddrinfo
#endif /* EAI_ADDRFAMILY */
#define EAI_ADDRFAMILY 1 /* address family for hostname not supported */
#define EAI_AGAIN 2 /* temporary failure in name resolution */
#define EAI_BADFLAGS 3 /* invalid value for ai_flags */
#define EAI_FAIL 4 /* non-recoverable failure in name resolution */
#define EAI_FAMILY 5 /* ai_family not supported */
#define EAI_MEMORY 6 /* memory allocation failure */
#define EAI_NODATA 7 /* no address associated with hostname */
#define EAI_NONAME 8 /* hostname nor servname provided, or not known */
#define EAI_SERVICE 9 /* servname not supported for ai_socktype */
#define EAI_SOCKTYPE 10 /* ai_socktype not supported */
#define EAI_SYSTEM 11 /* system error returned in errno */
#define EAI_BADHINTS 12
#define EAI_PROTOCOL 13
#define EAI_MAX 14
/*
* Flag values for getaddrinfo()
*/
#ifdef AI_PASSIVE
#undef AI_PASSIVE
#undef AI_CANONNAME
#undef AI_NUMERICHOST
#undef AI_MASK
#undef AI_ALL
#undef AI_V4MAPPED_CFG
#undef AI_ADDRCONFIG
#undef AI_V4MAPPED
#undef AI_DEFAULT
#endif /* AI_PASSIVE */
#define AI_PASSIVE 0x00000001 /* get address to use bind() */
#define AI_CANONNAME 0x00000002 /* fill ai_canonname */
#define AI_NUMERICHOST 0x00000004 /* prevent name resolution */
/* valid flags for addrinfo */
#define AI_MASK (AI_PASSIVE | AI_CANONNAME | AI_NUMERICHOST)
#define AI_ALL 0x00000100 /* IPv6 and IPv4-mapped (with AI_V4MAPPED) */
#define AI_V4MAPPED_CFG 0x00000200 /* accept IPv4-mapped if kernel supports */
#define AI_ADDRCONFIG 0x00000400 /* only if any address is assigned */
#define AI_V4MAPPED 0x00000800 /* accept IPv4-mapped IPv6 address */
/* special recommended flags for getipnodebyname */
#define AI_DEFAULT (AI_V4MAPPED_CFG | AI_ADDRCONFIG)
#endif /* !HAVE_GETADDRINFO */
#ifndef HAVE_GETNAMEINFO
/*
* Constants for getnameinfo()
*/
#ifndef NI_MAXHOST
#define NI_MAXHOST 1025
#define NI_MAXSERV 32
#endif /* !NI_MAXHOST */
/*
* Flag values for getnameinfo()
*/
#ifndef NI_NOFQDN
#define NI_NOFQDN 0x00000001
#define NI_NUMERICHOST 0x00000002
#define NI_NAMEREQD 0x00000004
#define NI_NUMERICSERV 0x00000008
#define NI_DGRAM 0x00000010
#endif /* !NI_NOFQDN */
#endif /* !HAVE_GETNAMEINFO */
#ifndef HAVE_ADDRINFO
struct addrinfo {
int ai_flags; /* AI_PASSIVE, AI_CANONNAME */
int ai_family; /* PF_xxx */
int ai_socktype; /* SOCK_xxx */
int ai_protocol; /* 0 or IPPROTO_xxx for IPv4 and IPv6 */
size_t ai_addrlen; /* length of ai_addr */
char *ai_canonname; /* canonical name for hostname */
struct sockaddr *ai_addr; /* binary address */
struct addrinfo *ai_next; /* next structure in linked list */
};
#endif /* !HAVE_ADDRINFO */
#ifndef HAVE_SOCKADDR_STORAGE
/*
* RFC 2553: protocol-independent placeholder for socket addresses
*/
#define _SS_MAXSIZE 128
#ifdef HAVE_LONG_LONG
#define _SS_ALIGNSIZE (sizeof(PY_LONG_LONG))
#else
#define _SS_ALIGNSIZE (sizeof(double))
#endif /* HAVE_LONG_LONG */
#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof(u_char) * 2)
#define _SS_PAD2SIZE (_SS_MAXSIZE - sizeof(u_char) * 2 - \
_SS_PAD1SIZE - _SS_ALIGNSIZE)
struct sockaddr_storage {
#ifdef HAVE_SOCKADDR_SA_LEN
unsigned char ss_len; /* address length */
unsigned char ss_family; /* address family */
#else
unsigned short ss_family; /* address family */
#endif /* HAVE_SOCKADDR_SA_LEN */
char __ss_pad1[_SS_PAD1SIZE];
#ifdef HAVE_LONG_LONG
PY_LONG_LONG __ss_align; /* force desired structure storage alignment */
#else
double __ss_align; /* force desired structure storage alignment */
#endif /* HAVE_LONG_LONG */
char __ss_pad2[_SS_PAD2SIZE];
};
#endif /* !HAVE_SOCKADDR_STORAGE */
#ifdef __cplusplus
extern "C" {
#endif
extern void freehostent Py_PROTO((struct hostent *));
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,779 @@
#include "Python.h"
#include "import.h"
#include "cStringIO.h"
#include "structmember.h"
PyDoc_STRVAR(cStringIO_module_documentation,
"A simple fast partial StringIO replacement.\n"
"\n"
"This module provides a simple useful replacement for\n"
"the StringIO module that is written in C. It does not provide the\n"
"full generality of StringIO, but it provides enough for most\n"
"applications and is especially useful in conjunction with the\n"
"pickle module.\n"
"\n"
"Usage:\n"
"\n"
" from cStringIO import StringIO\n"
"\n"
" an_output_stream=StringIO()\n"
" an_output_stream.write(some_stuff)\n"
" ...\n"
" value=an_output_stream.getvalue()\n"
"\n"
" an_input_stream=StringIO(a_string)\n"
" spam=an_input_stream.readline()\n"
" spam=an_input_stream.read(5)\n"
" an_input_stream.seek(0) # OK, start over\n"
" spam=an_input_stream.read() # and read it all\n"
" \n"
"If someone else wants to provide a more complete implementation,\n"
"go for it. :-) \n"
"\n"
"cStringIO.c,v 1.29 1999/06/15 14:10:27 jim Exp\n");
/* Declaration for file-like objects that manage data as strings
The IOobject type should be though of as a common base type for
Iobjects, which provide input (read-only) StringIO objects and
Oobjects, which provide read-write objects. Most of the methods
depend only on common data.
*/
typedef struct {
PyObject_HEAD
char *buf;
Py_ssize_t pos, string_size;
} IOobject;
#define IOOOBJECT(O) ((IOobject*)(O))
/* Declarations for objects of type StringO */
typedef struct { /* Subtype of IOobject */
PyObject_HEAD
char *buf;
Py_ssize_t pos, string_size;
Py_ssize_t buf_size;
int softspace;
} Oobject;
/* Declarations for objects of type StringI */
typedef struct { /* Subtype of IOobject */
PyObject_HEAD
char *buf;
Py_ssize_t pos, string_size;
Py_buffer pbuf;
} Iobject;
/* IOobject (common) methods */
PyDoc_STRVAR(IO_flush__doc__, "flush(): does nothing.");
static int
IO__opencheck(IOobject *self) {
if (!self->buf) {
PyErr_SetString(PyExc_ValueError,
"I/O operation on closed file");
return 0;
}
return 1;
}
static PyObject *
IO_get_closed(IOobject *self, void *closure)
{
PyObject *result = Py_False;
if (self->buf == NULL)
result = Py_True;
Py_INCREF(result);
return result;
}
static PyGetSetDef file_getsetlist[] = {
{"closed", (getter)IO_get_closed, NULL, "True if the file is closed"},
{0},
};
static PyObject *
IO_flush(IOobject *self, PyObject *unused) {
if (!IO__opencheck(self)) return NULL;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(IO_getval__doc__,
"getvalue([use_pos]) -- Get the string value."
"\n"
"If use_pos is specified and is a true value, then the string returned\n"
"will include only the text up to the current file position.\n");
static PyObject *
IO_cgetval(PyObject *self) {
if (!IO__opencheck(IOOOBJECT(self))) return NULL;
assert(IOOOBJECT(self)->pos >= 0);
return PyString_FromStringAndSize(((IOobject*)self)->buf,
((IOobject*)self)->pos);
}
static PyObject *
IO_getval(IOobject *self, PyObject *args) {
PyObject *use_pos=Py_None;
int b;
Py_ssize_t s;
if (!IO__opencheck(self)) return NULL;
if (!PyArg_UnpackTuple(args,"getval", 0, 1,&use_pos)) return NULL;
b = PyObject_IsTrue(use_pos);
if (b < 0)
return NULL;
if (b) {
s=self->pos;
if (s > self->string_size) s=self->string_size;
}
else
s=self->string_size;
assert(self->pos >= 0);
return PyString_FromStringAndSize(self->buf, s);
}
PyDoc_STRVAR(IO_isatty__doc__, "isatty(): always returns 0");
static PyObject *
IO_isatty(IOobject *self, PyObject *unused) {
if (!IO__opencheck(self)) return NULL;
Py_INCREF(Py_False);
return Py_False;
}
PyDoc_STRVAR(IO_read__doc__,
"read([s]) -- Read s characters, or the rest of the string");
static int
IO_cread(PyObject *self, char **output, Py_ssize_t n) {
Py_ssize_t l;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
assert(IOOOBJECT(self)->pos >= 0);
assert(IOOOBJECT(self)->string_size >= 0);
l = ((IOobject*)self)->string_size - ((IOobject*)self)->pos;
if (n < 0 || n > l) {
n = l;
if (n < 0) n=0;
}
if (n > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
((IOobject*)self)->pos += n;
return (int)n;
}
static PyObject *
IO_read(IOobject *self, PyObject *args) {
Py_ssize_t n = -1;
char *output = NULL;
if (!PyArg_ParseTuple(args, "|n:read", &n)) return NULL;
if ( (n=IO_cread((PyObject*)self,&output,n)) < 0) return NULL;
return PyString_FromStringAndSize(output, n);
}
PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line");
static int
IO_creadline(PyObject *self, char **output) {
char *n, *start, *end;
Py_ssize_t len;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
while (n < end && *n != '\n')
n++;
if (n < end) n++;
len = n - start;
if (len > INT_MAX)
len = INT_MAX;
*output=start;
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
assert(IOOOBJECT(self)->pos >= 0);
assert(IOOOBJECT(self)->string_size >= 0);
((IOobject*)self)->pos += len;
return (int)len;
}
static PyObject *
IO_readline(IOobject *self, PyObject *args) {
int n, m=-1;
char *output;
if (args)
if (!PyArg_ParseTuple(args, "|i:readline", &m)) return NULL;
if( (n=IO_creadline((PyObject*)self,&output)) < 0) return NULL;
if (m >= 0 && m < n) {
m = n - m;
n -= m;
self->pos -= m;
}
assert(IOOOBJECT(self)->pos >= 0);
return PyString_FromStringAndSize(output, n);
}
PyDoc_STRVAR(IO_readlines__doc__, "readlines() -- Read all lines");
static PyObject *
IO_readlines(IOobject *self, PyObject *args) {
int n;
char *output;
PyObject *result, *line;
Py_ssize_t hint = 0, length = 0;
if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
result = PyList_New(0);
if (!result)
return NULL;
while (1){
if ( (n = IO_creadline((PyObject*)self,&output)) < 0)
goto err;
if (n == 0)
break;
line = PyString_FromStringAndSize (output, n);
if (!line)
goto err;
if (PyList_Append (result, line) == -1) {
Py_DECREF (line);
goto err;
}
Py_DECREF (line);
length += n;
if (hint > 0 && length >= hint)
break;
}
return result;
err:
Py_DECREF(result);
return NULL;
}
PyDoc_STRVAR(IO_reset__doc__,
"reset() -- Reset the file position to the beginning");
static PyObject *
IO_reset(IOobject *self, PyObject *unused) {
if (!IO__opencheck(self)) return NULL;
self->pos = 0;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(IO_tell__doc__, "tell() -- get the current position.");
static PyObject *
IO_tell(IOobject *self, PyObject *unused) {
if (!IO__opencheck(self)) return NULL;
assert(self->pos >= 0);
return PyInt_FromSsize_t(self->pos);
}
PyDoc_STRVAR(IO_truncate__doc__,
"truncate(): truncate the file at the current position.");
static PyObject *
IO_truncate(IOobject *self, PyObject *args) {
Py_ssize_t pos = -1;
if (!IO__opencheck(self)) return NULL;
if (!PyArg_ParseTuple(args, "|n:truncate", &pos)) return NULL;
if (PyTuple_Size(args) == 0) {
/* No argument passed, truncate to current position */
pos = self->pos;
}
if (pos < 0) {
errno = EINVAL;
PyErr_SetFromErrno(PyExc_IOError);
return NULL;
}
if (self->string_size > pos) self->string_size = pos;
self->pos = self->string_size;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject *
IO_iternext(Iobject *self)
{
PyObject *next;
next = IO_readline((IOobject *)self, NULL);
if (!next)
return NULL;
if (!PyString_GET_SIZE(next)) {
Py_DECREF(next);
PyErr_SetNone(PyExc_StopIteration);
return NULL;
}
return next;
}
/* Read-write object methods */
PyDoc_STRVAR(IO_seek__doc__,
"seek(position) -- set the current position\n"
"seek(position, mode) -- mode 0: absolute; 1: relative; 2: relative to EOF");
static PyObject *
IO_seek(Iobject *self, PyObject *args) {
Py_ssize_t position;
int mode = 0;
if (!IO__opencheck(IOOOBJECT(self))) return NULL;
if (!PyArg_ParseTuple(args, "n|i:seek", &position, &mode))
return NULL;
if (mode == 2) {
position += self->string_size;
}
else if (mode == 1) {
position += self->pos;
}
if (position < 0) position=0;
self->pos=position;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(O_write__doc__,
"write(s) -- Write a string to the file"
"\n\nNote (hack:) writing None resets the buffer");
static int
O_cwrite(PyObject *self, const char *c, Py_ssize_t len) {
Py_ssize_t newpos;
Oobject *oself;
char *newbuf;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
oself = (Oobject *)self;
if (len > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
assert(len >= 0);
if (oself->pos >= PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
return -1;
}
newpos = oself->pos + len;
if (newpos >= oself->buf_size) {
size_t newsize = oself->buf_size;
newsize *= 2;
if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
assert(newpos < PY_SSIZE_T_MAX - 1);
newsize = newpos + 1;
}
newbuf = (char*)realloc(oself->buf, newsize);
if (!newbuf) {
PyErr_SetString(PyExc_MemoryError,"out of memory");
return -1;
}
oself->buf_size = (Py_ssize_t)newsize;
oself->buf = newbuf;
}
if (oself->string_size < oself->pos) {
/* In case of overseek, pad with null bytes the buffer region between
the end of stream and the current position.
0 lo string_size hi
| |<---used--->|<----------available----------->|
| | <--to pad-->|<---to write---> |
0 buf position
*/
memset(oself->buf + oself->string_size, '\0',
(oself->pos - oself->string_size) * sizeof(char));
}
memcpy(oself->buf + oself->pos, c, len);
oself->pos = newpos;
if (oself->string_size < oself->pos) {
oself->string_size = oself->pos;
}
return (int)len;
}
static PyObject *
O_write(Oobject *self, PyObject *args) {
Py_buffer buf;
int result;
if (!PyArg_ParseTuple(args, "s*:write", &buf)) return NULL;
result = O_cwrite((PyObject*)self, buf.buf, buf.len);
PyBuffer_Release(&buf);
if (result < 0) return NULL;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(O_close__doc__, "close(): explicitly release resources held.");
static PyObject *
O_close(Oobject *self, PyObject *unused) {
if (self->buf != NULL) free(self->buf);
self->buf = NULL;
self->pos = self->string_size = self->buf_size = 0;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(O_writelines__doc__,
"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
"\n"
"Note that newlines are not added. The sequence can be any iterable object\n"
"producing strings. This is equivalent to calling write() for each string.");
static PyObject *
O_writelines(Oobject *self, PyObject *args) {
PyObject *it, *s;
it = PyObject_GetIter(args);
if (it == NULL)
return NULL;
while ((s = PyIter_Next(it)) != NULL) {
Py_ssize_t n;
char *c;
if (PyString_AsStringAndSize(s, &c, &n) == -1) {
Py_DECREF(it);
Py_DECREF(s);
return NULL;
}
if (O_cwrite((PyObject *)self, c, n) == -1) {
Py_DECREF(it);
Py_DECREF(s);
return NULL;
}
Py_DECREF(s);
}
Py_DECREF(it);
/* See if PyIter_Next failed */
if (PyErr_Occurred())
return NULL;
Py_RETURN_NONE;
}
static struct PyMethodDef O_methods[] = {
/* Common methods: */
{"flush", (PyCFunction)IO_flush, METH_NOARGS, IO_flush__doc__},
{"getvalue", (PyCFunction)IO_getval, METH_VARARGS, IO_getval__doc__},
{"isatty", (PyCFunction)IO_isatty, METH_NOARGS, IO_isatty__doc__},
{"read", (PyCFunction)IO_read, METH_VARARGS, IO_read__doc__},
{"readline", (PyCFunction)IO_readline, METH_VARARGS, IO_readline__doc__},
{"readlines", (PyCFunction)IO_readlines,METH_VARARGS, IO_readlines__doc__},
{"reset", (PyCFunction)IO_reset, METH_NOARGS, IO_reset__doc__},
{"seek", (PyCFunction)IO_seek, METH_VARARGS, IO_seek__doc__},
{"tell", (PyCFunction)IO_tell, METH_NOARGS, IO_tell__doc__},
{"truncate", (PyCFunction)IO_truncate, METH_VARARGS, IO_truncate__doc__},
/* Read-write StringIO specific methods: */
{"close", (PyCFunction)O_close, METH_NOARGS, O_close__doc__},
{"write", (PyCFunction)O_write, METH_VARARGS, O_write__doc__},
{"writelines", (PyCFunction)O_writelines, METH_O, O_writelines__doc__},
{NULL, NULL} /* sentinel */
};
static PyMemberDef O_memberlist[] = {
{"softspace", T_INT, offsetof(Oobject, softspace), 0,
"flag indicating that a space needs to be printed; used by print"},
/* getattr(f, "closed") is implemented without this table */
{NULL} /* Sentinel */
};
static void
O_dealloc(Oobject *self) {
if (self->buf != NULL)
free(self->buf);
PyObject_Del(self);
}
PyDoc_STRVAR(Otype__doc__, "Simple type for output to strings.");
static PyTypeObject Otype = {
PyVarObject_HEAD_INIT(NULL, 0)
"cStringIO.StringO", /*tp_name*/
sizeof(Oobject), /*tp_basicsize*/
0, /*tp_itemsize*/
/* methods */
(destructor)O_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0 , /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
Py_TPFLAGS_DEFAULT, /*tp_flags*/
Otype__doc__, /*tp_doc */
0, /*tp_traverse */
0, /*tp_clear */
0, /*tp_richcompare */
0, /*tp_weaklistoffset */
PyObject_SelfIter, /*tp_iter */
(iternextfunc)IO_iternext, /*tp_iternext */
O_methods, /*tp_methods */
O_memberlist, /*tp_members */
file_getsetlist, /*tp_getset */
};
static PyObject *
newOobject(int size) {
Oobject *self;
self = PyObject_New(Oobject, &Otype);
if (self == NULL)
return NULL;
self->pos=0;
self->string_size = 0;
self->softspace = 0;
self->buf = (char *)malloc(size);
if (!self->buf) {
PyErr_SetString(PyExc_MemoryError,"out of memory");
self->buf_size = 0;
Py_DECREF(self);
return NULL;
}
self->buf_size=size;
return (PyObject*)self;
}
/* End of code for StringO objects */
/* -------------------------------------------------------- */
static PyObject *
I_close(Iobject *self, PyObject *unused) {
PyBuffer_Release(&self->pbuf);
self->buf = NULL;
self->pos = self->string_size = 0;
Py_INCREF(Py_None);
return Py_None;
}
static struct PyMethodDef I_methods[] = {
/* Common methods: */
{"flush", (PyCFunction)IO_flush, METH_NOARGS, IO_flush__doc__},
{"getvalue", (PyCFunction)IO_getval, METH_VARARGS, IO_getval__doc__},
{"isatty", (PyCFunction)IO_isatty, METH_NOARGS, IO_isatty__doc__},
{"read", (PyCFunction)IO_read, METH_VARARGS, IO_read__doc__},
{"readline", (PyCFunction)IO_readline, METH_VARARGS, IO_readline__doc__},
{"readlines", (PyCFunction)IO_readlines,METH_VARARGS, IO_readlines__doc__},
{"reset", (PyCFunction)IO_reset, METH_NOARGS, IO_reset__doc__},
{"seek", (PyCFunction)IO_seek, METH_VARARGS, IO_seek__doc__},
{"tell", (PyCFunction)IO_tell, METH_NOARGS, IO_tell__doc__},
{"truncate", (PyCFunction)IO_truncate, METH_VARARGS, IO_truncate__doc__},
/* Read-only StringIO specific methods: */
{"close", (PyCFunction)I_close, METH_NOARGS, O_close__doc__},
{NULL, NULL}
};
static void
I_dealloc(Iobject *self) {
PyBuffer_Release(&self->pbuf);
PyObject_Del(self);
}
PyDoc_STRVAR(Itype__doc__,
"Simple type for treating strings as input file streams");
static PyTypeObject Itype = {
PyVarObject_HEAD_INIT(NULL, 0)
"cStringIO.StringI", /*tp_name*/
sizeof(Iobject), /*tp_basicsize*/
0, /*tp_itemsize*/
/* methods */
(destructor)I_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /* tp_getattr */
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
Itype__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)IO_iternext, /* tp_iternext */
I_methods, /* tp_methods */
0, /* tp_members */
file_getsetlist, /* tp_getset */
};
static PyObject *
newIobject(PyObject *s) {
Iobject *self;
Py_buffer buf;
PyObject *args;
int result;
args = Py_BuildValue("(O)", s);
if (args == NULL)
return NULL;
result = PyArg_ParseTuple(args, "s*:StringIO", &buf);
Py_DECREF(args);
if (!result)
return NULL;
self = PyObject_New(Iobject, &Itype);
if (!self) {
PyBuffer_Release(&buf);
return NULL;
}
self->buf=buf.buf;
self->string_size=buf.len;
self->pbuf=buf;
self->pos=0;
return (PyObject*)self;
}
/* End of code for StringI objects */
/* -------------------------------------------------------- */
PyDoc_STRVAR(IO_StringIO__doc__,
"StringIO([s]) -- Return a StringIO-like stream for reading or writing");
static PyObject *
IO_StringIO(PyObject *self, PyObject *args) {
PyObject *s=0;
if (!PyArg_UnpackTuple(args, "StringIO", 0, 1, &s)) return NULL;
if (s) return newIobject(s);
return newOobject(128);
}
/* List of methods defined in the module */
static struct PyMethodDef IO_methods[] = {
{"StringIO", (PyCFunction)IO_StringIO,
METH_VARARGS, IO_StringIO__doc__},
{NULL, NULL} /* sentinel */
};
/* Initialization function for the module (*must* be called initcStringIO) */
static struct PycStringIO_CAPI CAPI = {
IO_cread,
IO_creadline,
O_cwrite,
IO_cgetval,
newOobject,
newIobject,
&Itype,
&Otype,
};
#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
#define PyMODINIT_FUNC void
#endif
PyMODINIT_FUNC
initcStringIO(void) {
PyObject *m, *d, *v;
/* Create the module and add the functions */
m = Py_InitModule4("cStringIO", IO_methods,
cStringIO_module_documentation,
(PyObject*)NULL,PYTHON_API_VERSION);
if (m == NULL) return;
/* Add some symbolic constants to the module */
d = PyModule_GetDict(m);
/* Export C API */
Py_TYPE(&Itype)=&PyType_Type;
Py_TYPE(&Otype)=&PyType_Type;
if (PyType_Ready(&Otype) < 0) return;
if (PyType_Ready(&Itype) < 0) return;
v = PyCapsule_New(&CAPI, PycStringIO_CAPSULE_NAME, NULL);
PyDict_SetItemString(d,"cStringIO_CAPI", v);
Py_XDECREF(v);
/* Export Types */
PyDict_SetItemString(d,"InputType", (PyObject*)&Itype);
PyDict_SetItemString(d,"OutputType", (PyObject*)&Otype);
/* Maybe make certain warnings go away */
if (0) PycString_IMPORT;
}

View File

@ -0,0 +1,79 @@
To generate or modify mapping headers
-------------------------------------
Mapping headers are imported from CJKCodecs as pre-generated form.
If you need to tweak or add something on it, please look at tools/
subdirectory of CJKCodecs' distribution.
Notes on implmentation characteristics of each codecs
-----------------------------------------------------
1) Big5 codec
The big5 codec maps the following characters as cp950 does rather
than conforming Unicode.org's that maps to 0xFFFD.
BIG5 Unicode Description
0xA15A 0x2574 SPACING UNDERSCORE
0xA1C3 0xFFE3 SPACING HEAVY OVERSCORE
0xA1C5 0x02CD SPACING HEAVY UNDERSCORE
0xA1FE 0xFF0F LT DIAG UP RIGHT TO LOW LEFT
0xA240 0xFF3C LT DIAG UP LEFT TO LOW RIGHT
0xA2CC 0x5341 HANGZHOU NUMERAL TEN
0xA2CE 0x5345 HANGZHOU NUMERAL THIRTY
Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
big5 codes already, a roundtrip compatibility is not guaranteed for
them.
2) cp932 codec
To conform to Windows's real mapping, cp932 codec maps the following
codepoints in addition of the official cp932 mapping.
CP932 Unicode Description
0x80 0x80 UNDEFINED
0xA0 0xF8F0 UNDEFINED
0xFD 0xF8F1 UNDEFINED
0xFE 0xF8F2 UNDEFINED
0xFF 0xF8F3 UNDEFINED
3) euc-jisx0213 codec
The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
unicode U+FF3C instead of U+005C as on unicode.org's mapping.
Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
is shown as a full width character, mapping to U+FF3C can make
more sense.
The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
overlapped by each other, it doesn't bother standard conformations
(and JIS X 0213 Plane 2 is intended to use so.) On encoding
sessions, the codec will try to encode kanji characters in this
order:
JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212
4) euc-jp codec
The euc-jp codec is a compatibility instance on these points:
- U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
- U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
- U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)
5) shift-jis codec
The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
instead of using JIS X 0201 for compatibility. The differences are:
- U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
- U+007E TILDE is mapped to SHIFT-JIS 0x7e.
- U+FF3C FULL-WIDTH REVERSE SOLIDUS is mapped to SHIFT-JIS 815f.

View File

@ -0,0 +1,444 @@
/*
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_cn.h"
/**
* hz is predefined as 100 on AIX. So we undefine it to avoid
* conflict against hz codec's.
*/
#ifdef _AIX
#undef hz
#endif
/* GBK and GB2312 map differently in few code points that are listed below:
*
* gb2312 gbk
* A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
* A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
* A844 undefined U+2015 HORIZONTAL BAR
*/
#define GBK_DECODE(dc1, dc2, assi) \
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
else TRYMAP_DEC(gbkext, assi, dc1, dc2);
#define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
else if ((code) == 0x2015) (assi) = 0xa844; \
else if ((code) == 0x00b7) (assi) = 0xa1a4; \
else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code));
/*
* GB2312 codec
*/
ENCODER(gb2312)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(gb2312)
{
while (inleft > 0) {
unsigned char c = **inbuf;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* GBK codec
*/
ENCODER(gbk)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code)
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gbk)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
GBK_DECODE(c, IN2, **outbuf)
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* GB18030 codec
*/
ENCODER(gb18030)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
return 2; /* surrogates pair */
#else
return 1;
#endif
else if (c >= 0x10000) {
ucs4_t tc = c - 0x10000;
REQUIRE_OUTBUF(4)
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)(tc + 0x90))
#if Py_UNICODE_SIZE == 2
NEXT(2, 4) /* surrogates pair */
#else
NEXT(1, 4)
#endif
continue;
}
REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code)
else TRYMAP_ENC(gb18030ext, code, c);
else {
const struct _gb18030_to_unibmp_ranges *utrrange;
REQUIRE_OUTBUF(4)
for (utrrange = gb18030_to_unibmp_ranges;
utrrange->first != 0;
utrrange++)
if (utrrange->first <= c &&
c <= utrrange->last) {
Py_UNICODE tc;
tc = c - utrrange->first +
utrrange->base;
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)tc + 0x81)
NEXT(1, 4)
break;
}
if (utrrange->first == 0)
return 1;
continue;
}
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gb18030)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
ucs4_t lseq;
REQUIRE_INBUF(4)
c3 = IN3;
c4 = IN4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
return 4;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
(ucs4_t)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
OUT1(utr->first - utr->base + lseq)
NEXT(4, 1)
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
* 1260 + (ucs4_t)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
WRITEUCS4(lseq);
NEXT_IN(4)
continue;
}
}
return 4;
}
GBK_DECODE(c, c2, **outbuf)
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* HZ codec
*/
ENCODER_INIT(hz)
{
state->i = 0;
return 0;
}
ENCODER_RESET(hz)
{
if (state->i != 0) {
WRITE2('~', '}')
state->i = 0;
NEXT_OUT(2)
}
return 0;
}
ENCODER(hz)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
WRITE1((unsigned char)c)
NEXT(1, 1)
}
else {
WRITE3('~', '}', (unsigned char)c)
NEXT(1, 3)
state->i = 0;
}
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
if (state->i == 0) {
WRITE4('~', '{', code >> 8, code & 0xff)
NEXT(1, 4)
state->i = 1;
}
else {
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
}
}
return 0;
}
DECODER_INIT(hz)
{
state->i = 0;
return 0;
}
DECODER_RESET(hz)
{
state->i = 0;
return 0;
}
DECODER(hz)
{
while (inleft > 0) {
unsigned char c = IN1;
if (c == '~') {
unsigned char c2 = IN2;
REQUIRE_INBUF(2)
if (c2 == '~') {
WRITE1('~')
NEXT(2, 1)
continue;
}
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else
return 2;
NEXT(2, 0);
continue;
}
if (c & 0x80)
return 1;
if (state->i == 0) { /* ASCII mode */
WRITE1(c)
NEXT(1, 1)
}
else { /* GB mode */
REQUIRE_INBUF(2)
REQUIRE_OUTBUF(1)
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
NEXT(2, 1)
}
else
return 2;
}
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(gb2312)
MAPPING_DECONLY(gbkext)
MAPPING_ENCONLY(gbcommon)
MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(gb2312)
CODEC_STATELESS(gbk)
CODEC_STATELESS(gb18030)
CODEC_STATEFUL(hz)
END_CODECS_LIST
I_AM_A_MODULE_FOR(cn)

View File

@ -0,0 +1,184 @@
/*
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#define USING_IMPORTED_MAPS
#include "cjkcodecs.h"
#include "mappings_hk.h"
/*
* BIG5HKSCS codec
*/
static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;
CODEC_INIT(big5hkscs)
{
static int initialized = 0;
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
return -1;
initialized = 1;
return 0;
}
/*
* There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
* U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
* U+00CA U+030C -> 8864
* U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
* U+00EA U+030C -> 88a5
* These are handled by not mapping tables but a hand-written code.
*/
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
ENCODER(big5hkscs)
{
while (inleft > 0) {
ucs4_t c = **inbuf;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
REQUIRE_OUTBUF(2)
if (c < 0x10000) {
TRYMAP_ENC(big5hkscs_bmp, code, c) {
if (code == MULTIC) {
if (inleft >= 2 &&
((c & 0xffdf) == 0x00ca) &&
(((*inbuf)[1] & 0xfff7) == 0x0304)) {
code = big5hkscs_pairenc_table[
((c >> 4) |
((*inbuf)[1] >> 3)) & 3];
insize = 2;
}
else if (inleft < 2 &&
!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
else {
if (c == 0xca)
code = 0x8866;
else /* c == 0xea */
code = 0x88a7;
}
}
}
else TRYMAP_ENC(big5, code, c);
else return 1;
}
else if (c < 0x20000)
return insize;
else if (c < 0x30000) {
TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
else return insize;
}
else
return insize;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(insize, 2)
}
return 0;
}
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
DECODER(big5hkscs)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t decoded;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
continue;
}
}
TRYMAP_DEC(big5hkscs, decoded, c, IN2)
{
int s = BH2S(c, IN2);
const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe);
assert(0x40 <= IN2 && IN2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x87, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_12130;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21924;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
WRITEUCS4(decoded | 0x20000)
NEXT_IN(2)
}
else {
OUT1(decoded)
NEXT(2, 1)
}
continue;
}
switch ((c << 8) | IN2) {
case 0x8862: WRITE2(0x00ca, 0x0304); break;
case 0x8864: WRITE2(0x00ca, 0x030c); break;
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
default: return 2;
}
NEXT(2, 2) /* all decoded code points are pairs, above. */
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST
I_AM_A_MODULE_FOR(hk)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,731 @@
/*
* _codecs_jp.c: Codecs collection for Japanese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#define USING_BINARY_PAIR_SEARCH
#define EMPBASE 0x20000
#include "cjkcodecs.h"
#include "mappings_jp.h"
#include "mappings_jisx0213_pair.h"
#include "alg_jisx0201.h"
#include "emu_jisx0213_2000.h"
/*
* CP932 codec
*/
ENCODER(cp932)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
else if (c >= 0xff61 && c <= 0xff9f) {
WRITE1(c - 0xfec0)
NEXT(1, 1)
continue;
}
else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatibility */
REQUIRE_OUTBUF(1)
if (c == 0xf8f0)
OUT1(0xa0)
else
OUT1(c - 0xfef1 + 0xfd)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp932ext, code, c) {
OUT1(code >> 8)
OUT2(code & 0xff)
}
else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
/* JIS X 0208 */
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
c1 = (Py_UNICODE)(c - 0xe000) / 188;
c2 = (Py_UNICODE)(c - 0xe000) % 188;
OUT1(c1 + 0xf0)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else
return 1;
NEXT(1, 2)
}
return 0;
}
DECODER(cp932)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c <= 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
OUT1(0xf8f0) /* half-width katakana */
else
OUT1(0xfec0 + c)
NEXT(1, 1)
continue;
}
else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
OUT1(0xf8f1 - 0xfd + c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
(c2 >= 0x80 && c2 <= 0xfc))
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
return 2;
}
else
return 2;
NEXT(2, 1)
}
return 0;
}
/*
* EUC-JIS-2004 codec
*/
ENCODER(euc_jis_2004)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
if (c <= 0xFFFF) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
code = find_pairencmap(
(ucs2_t)c, (*inbuf)[1],
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
}
else TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
else if (c == 0xff3c)
/* F/W REVERSE SOLIDUS (see NOTES) */
code = 0x2140;
else if (c == 0xff5e)
/* F/W TILDE (see NOTES) */
code = 0x2232;
else
return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
else return insize;
}
else
return insize;
if (code & 0x8000) {
/* Codeset 2 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(insize, 3)
} else {
/* Codeset 1 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(insize, 2)
}
}
return 0;
}
DECODER(euc_jis_2004)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t code;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
}
else
return 2;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3)
c2 = IN2 ^ 0x80;
c3 = IN3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(3)
continue;
}
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
else return 3;
NEXT(3, 1)
}
else {
unsigned char c2;
REQUIRE_INBUF(2)
c ^= 0x80;
c2 = IN2 ^ 0x80;
/* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
}
else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT(2, 2)
continue;
}
else return 2;
NEXT(2, 1)
}
}
return 0;
}
/*
* EUC-JP codec
*/
ENCODER(euc_jp)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
WRITE1(0x5c);
NEXT(1, 1)
continue;
} else if (c == 0x203e) { /* OVERLINE */
WRITE1(0x7e);
NEXT(1, 1)
continue;
}
#endif
else
return 1;
if (code & 0x8000) {
/* JIS X 0212 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(1, 3)
} else {
/* JIS X 0208 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(1, 2)
}
}
return 0;
}
DECODER(euc_jp)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
}
else
return 2;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3)
c2 = IN2;
c3 = IN3;
/* JIS X 0212 */
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
NEXT(3, 1)
}
else
return 3;
}
else {
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0)
/* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else
#endif
TRYMAP_DEC(jisx0208, **outbuf,
c ^ 0x80, c2 ^ 0x80) ;
else return 2;
NEXT(2, 1)
}
}
return 0;
}
/*
* SHIFT_JIS codec
*/
ENCODER(shift_jis)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
#ifdef STRICT_BUILD
JISX0201_R_ENCODE(c, code)
#else
if (c < 0x80) code = c;
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
else UCS4INVALID(c)
else code = NOCHAR;
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
REQUIRE_OUTBUF(1)
OUT1((unsigned char)code)
NEXT(1, 1)
continue;
}
REQUIRE_OUTBUF(2)
if (code == NOCHAR) {
TRYMAP_ENC(jisxcommon, code, c);
#ifndef STRICT_BUILD
else if (c == 0xff3c)
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
#endif
else
return 1;
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
}
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
NEXT(1, 2)
}
return 0;
}
DECODER(shift_jis)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, **outbuf)
#else
if (c < 0x80) **outbuf = c;
#endif
else JISX0201_K_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
unsigned char c1, c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
#ifndef STRICT_BUILD
if (c1 == 0x21 && c2 == 0x40) {
/* FULL-WIDTH REVERSE SOLIDUS */
OUT1(0xff3c)
NEXT(2, 1)
continue;
}
#endif
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT(2, 1)
continue;
}
else
return 2;
}
else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
/*
* SHIFT_JIS-2004 codec
*/
ENCODER(shift_jis_2004)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code = NOCHAR;
int c1, c2;
Py_ssize_t insize;
JISX0201_ENCODE(c, code)
else DECODE_SURROGATE(c)
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
REQUIRE_OUTBUF(2)
insize = GET_INSIZE(c);
if (code == NOCHAR) {
if (c <= 0xffff) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap
((ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
code = find_pairencmap(
(ucs2_t)c, IN2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
insize = 2;
}
}
}
else TRYMAP_ENC(jisxcommon, code, c) {
/* abandon JIS X 0212 codes */
if (code & 0x8000)
return 1;
}
else return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
else return insize;
}
else
return insize;
}
c1 = code >> 8;
c2 = (code & 0xff) - 0x21;
if (c1 & 0x80) { /* Plane 2 */
if (c1 >= 0xee) c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
else c1 -= 0x43;
}
else /* Plane 1 */
c1 -= 0x21;
if (c1 & 1) c2 += 0x5e;
c1 >>= 1;
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
NEXT(insize, 2)
}
return 0;
}
DECODER(shift_jis_2004)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
JISX0201_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
unsigned char c1, c2;
ucs4_t code;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
if (c1 < 0x5e) { /* Plane 1 */
c1 += 0x21;
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
c1, c2)
else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT_OUT(1)
}
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
c1, c2) {
NEXT_OUT(1)
}
else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
WRITEUCS4(EMPBASE | code)
}
else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT_OUT(2)
}
else
return 2;
NEXT_IN(2)
}
else { /* Plane 2 */
if (c1 >= 0x67) c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
else c1 -= 0x3d;
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
c1, c2)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
c1, c2) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
}
else
return 2;
NEXT(2, 1)
}
continue;
}
else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(jisx0208)
MAPPING_DECONLY(jisx0212)
MAPPING_ENCONLY(jisxcommon)
MAPPING_DECONLY(jisx0213_1_bmp)
MAPPING_DECONLY(jisx0213_2_bmp)
MAPPING_ENCONLY(jisx0213_bmp)
MAPPING_DECONLY(jisx0213_1_emp)
MAPPING_DECONLY(jisx0213_2_emp)
MAPPING_ENCONLY(jisx0213_emp)
MAPPING_ENCDEC(jisx0213_pair)
MAPPING_ENCDEC(cp932ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(shift_jis)
CODEC_STATELESS(cp932)
CODEC_STATELESS(euc_jp)
CODEC_STATELESS(shift_jis_2004)
CODEC_STATELESS(euc_jis_2004)
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
END_CODECS_LIST
I_AM_A_MODULE_FOR(jp)

View File

@ -0,0 +1,452 @@
/*
* _codecs_kr.c: Codecs collection for Korean encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_kr.h"
/*
* EUC-KR codec
*/
#define EUCKR_JAMO_FIRSTBYTE 0xA4
#define EUCKR_JAMO_FILLER 0xD4
static const unsigned char u2cgk_choseong[19] = {
0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
0xbc, 0xbd, 0xbe
};
static const unsigned char u2cgk_jungseong[21] = {
0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
0xcf, 0xd0, 0xd1, 0xd2, 0xd3
};
static const unsigned char u2cgk_jongseong[28] = {
0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
0xbb, 0xbc, 0xbd, 0xbe
};
ENCODER(euc_kr)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
if ((code & 0x8000) == 0) {
/* KS X 1001 coded character */
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
else { /* Mapping is found in CP949 extension,
* but we encode it in KS X 1001:1998 Annex 3,
* make-up sequence for EUC-KR. */
REQUIRE_OUTBUF(8)
/* syllable composition precedence */
OUT1(EUCKR_JAMO_FIRSTBYTE)
OUT2(EUCKR_JAMO_FILLER)
/* All code points in CP949 extension are in unicode
* Hangul Syllable area. */
assert(0xac00 <= c && c <= 0xd7a3);
c -= 0xac00;
OUT3(EUCKR_JAMO_FIRSTBYTE)
OUT4(u2cgk_choseong[c / 588])
NEXT_OUT(4)
OUT1(EUCKR_JAMO_FIRSTBYTE)
OUT2(u2cgk_jungseong[(c / 28) % 21])
OUT3(EUCKR_JAMO_FIRSTBYTE)
OUT4(u2cgk_jongseong[c % 28])
NEXT(1, 4)
}
}
return 0;
}
#define NONE 127
static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
0, 1, NONE, 2, NONE, NONE, 3, 4,
5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
6, 7, 8, NONE, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18
};
static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
1, 2, 3, 4, 5, 6, 7, NONE,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, NONE, 18, 19, 20, 21, 22,
NONE, 23, 24, 25, 26, 27
};
DECODER(euc_kr)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
if (c == EUCKR_JAMO_FIRSTBYTE &&
IN2 == EUCKR_JAMO_FILLER) {
/* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR cho, jung, jong;
REQUIRE_INBUF(8)
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
return 8;
c = (*inbuf)[3];
if (0xa1 <= c && c <= 0xbe)
cho = cgk2u_choseong[c - 0xa1];
else
cho = NONE;
c = (*inbuf)[5];
jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
c = (*inbuf)[7];
if (c == EUCKR_JAMO_FILLER)
jong = 0;
else if (0xa1 <= c && c <= 0xbe)
jong = cgk2u_jongseong[c - 0xa1];
else
jong = NONE;
if (cho == NONE || jung == NONE || jong == NONE)
return 8;
OUT1(0xac00 + cho*588 + jung*28 + jong);
NEXT(8, 1)
}
else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
else
return 2;
}
return 0;
}
#undef NONE
/*
* CP949 codec
*/
ENCODER(cp949)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2(code & 0xFF) /* MSB set: CP949 */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
NEXT(1, 2)
}
return 0;
}
DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* JOHAB codec
*/
static const unsigned char u2johabidx_choseong[32] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14,
};
static const unsigned char u2johabidx_jungseong[32] = {
0x03, 0x04, 0x05, 0x06, 0x07,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x1a, 0x1b, 0x1c, 0x1d,
};
static const unsigned char u2johabidx_jongseong[32] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
};
static const DBCHAR u2johabjamo[] = {
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
0x8741, 0x8761, 0x8781, 0x87a1,
};
ENCODER(johab)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
if (c >= 0xac00 && c <= 0xd7a3) {
c -= 0xac00;
code = 0x8000 |
(u2johabidx_choseong[c / 588] << 10) |
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
u2johabidx_jongseong[c % 28];
}
else if (c >= 0x3131 && c <= 0x3163)
code = u2johabjamo[c - 0x3131];
else TRYMAP_ENC(cp949, code, c) {
unsigned char c1, c2, t2;
unsigned short t1;
assert((code & 0x8000) == 0);
c1 = code >> 8;
c2 = code & 0xff;
if (((c1 >= 0x21 && c1 <= 0x2c) ||
(c1 >= 0x4a && c1 <= 0x7d)) &&
(c2 >= 0x21 && c2 <= 0x7e)) {
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
(c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
OUT1(t1 >> 1)
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
NEXT(1, 2)
continue;
}
else
return 1;
}
else
return 1;
OUT1(code >> 8)
OUT2(code & 0xff)
NEXT(1, 2)
}
return 0;
}
#define FILL 0xfd
#define NONE 0xff
static const unsigned char johabidx_choseong[32] = {
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabidx_jungseong[32] = {
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
};
static const unsigned char johabidx_jongseong[32] = {
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
};
static const unsigned char johabjamo_choseong[32] = {
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabjamo_jungseong[32] = {
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
};
static const unsigned char johabjamo_jongseong[32] = {
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
};
DECODER(johab)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
if (c < 0xd8) {
/* johab hangul */
unsigned char c_cho, c_jung, c_jong;
unsigned char i_cho, i_jung, i_jong;
c_cho = (c >> 2) & 0x1f;
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
c_jong = c2 & 0x1f;
i_cho = johabidx_choseong[c_cho];
i_jung = johabidx_jungseong[c_jung];
i_jong = johabidx_jongseong[c_jong];
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
return 2;
/* we don't use U+1100 hangul jamo yet. */
if (i_cho == FILL) {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3000)
else
OUT1(0x3100 |
johabjamo_jongseong[c_jong])
}
else {
if (i_jong == FILL)
OUT1(0x3100 |
johabjamo_jungseong[c_jung])
else
return 2;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3100 |
johabjamo_choseong[c_cho])
else
return 2;
}
else
OUT1(0xac00 +
i_cho * 588 +
i_jung * 28 +
(i_jong == FILL ? 0 : i_jong))
}
NEXT(2, 1)
} else {
/* KS X 1001 except hangul jamos and syllables */
if (c == 0xdf || c > 0xf9 ||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
(c2 & 0x7f) == 0x7f ||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
return 2;
else {
unsigned char t1, t2;
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
2 * c - 0x197);
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
else return 2;
NEXT(2, 1)
}
}
}
return 0;
}
#undef NONE
#undef FILL
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(ksx1001)
MAPPING_ENCONLY(cp949)
MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(euc_kr)
CODEC_STATELESS(cp949)
CODEC_STATELESS(johab)
END_CODECS_LIST
I_AM_A_MODULE_FOR(kr)

View File

@ -0,0 +1,132 @@
/*
* _codecs_tw.c: Codecs collection for Taiwan's encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_tw.h"
/*
* BIG5 codec
*/
ENCODER(big5)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(big5)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* CP950 codec
*/
ENCODER(cp950)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
else TRYMAP_DEC(big5, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_ENCDEC(big5)
MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(big5)
CODEC_STATELESS(cp950)
END_CODECS_LIST
I_AM_A_MODULE_FOR(tw)

View File

@ -0,0 +1,24 @@
#define JISX0201_R_ENCODE(c, assi) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
(assi) = (c); \
else if ((c) == 0x00a5) (assi) = 0x5c; \
else if ((c) == 0x203e) (assi) = 0x7e;
#define JISX0201_K_ENCODE(c, assi) \
if ((c) >= 0xff61 && (c) <= 0xff9f) \
(assi) = (c) - 0xfec0;
#define JISX0201_ENCODE(c, assi) \
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
#define JISX0201_R_DECODE(c, assi) \
if ((c) < 0x5c) (assi) = (c); \
else if ((c) == 0x5c) (assi) = 0x00a5; \
else if ((c) < 0x7e) (assi) = (c); \
else if ((c) == 0x7e) (assi) = 0x203e; \
else if ((c) == 0x7f) (assi) = 0x7f;
#define JISX0201_K_DECODE(c, assi) \
if ((c) >= 0xa1 && (c) <= 0xdf) \
(assi) = 0xfec0 + (c);
#define JISX0201_DECODE(c, assi) \
JISX0201_R_DECODE(c, assi) \
else JISX0201_K_DECODE(c, assi)

View File

@ -0,0 +1,398 @@
/*
* cjkcodecs.h: common header for cjkcodecs
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "multibytecodec.h"
/* a unicode "undefined" code point */
#define UNIINV 0xFFFE
/* internal-use DBCS code points which aren't used by any charsets */
#define NOCHAR 0xFFFF
#define MULTIC 0xFFFE
#define DBCINV 0xFFFD
/* shorter macros to save source size of mapping tables */
#define U UNIINV
#define N NOCHAR
#define M MULTIC
#define D DBCINV
struct dbcs_index {
const ucs2_t *map;
unsigned char bottom, top;
};
typedef struct dbcs_index decode_map;
struct widedbcs_index {
const ucs4_t *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
struct unim_index {
const DBCHAR *map;
unsigned char bottom, top;
};
typedef struct unim_index encode_map;
struct unim_index_bytebased {
const unsigned char *map;
unsigned char bottom, top;
};
struct dbcs_map {
const char *charset;
const struct unim_index *encmap;
const struct dbcs_index *decmap;
};
struct pair_encodemap {
ucs4_t uniseq;
DBCHAR code;
};
static const MultibyteCodec *codec_list;
static const struct dbcs_map *mapping_list;
#define CODEC_INIT(encoding) \
static int encoding##_codec_init(const void *config)
#define ENCODER_INIT(encoding) \
static int encoding##_encode_init( \
MultibyteCodec_State *state, const void *config)
#define ENCODER(encoding) \
static Py_ssize_t encoding##_encode( \
MultibyteCodec_State *state, const void *config, \
const Py_UNICODE **inbuf, Py_ssize_t inleft, \
unsigned char **outbuf, Py_ssize_t outleft, int flags)
#define ENCODER_RESET(encoding) \
static Py_ssize_t encoding##_encode_reset( \
MultibyteCodec_State *state, const void *config, \
unsigned char **outbuf, Py_ssize_t outleft)
#define DECODER_INIT(encoding) \
static int encoding##_decode_init( \
MultibyteCodec_State *state, const void *config)
#define DECODER(encoding) \
static Py_ssize_t encoding##_decode( \
MultibyteCodec_State *state, const void *config, \
const unsigned char **inbuf, Py_ssize_t inleft, \
Py_UNICODE **outbuf, Py_ssize_t outleft)
#define DECODER_RESET(encoding) \
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code) \
if ((code) > 0xFFFF) \
return 1;
#else
#define UCS4INVALID(code) \
if (0) ;
#endif
#define NEXT_IN(i) \
(*inbuf) += (i); \
(inleft) -= (i);
#define NEXT_OUT(o) \
(*outbuf) += (o); \
(outleft) -= (o);
#define NEXT(i, o) \
NEXT_IN(i) NEXT_OUT(o)
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
return MBERR_TOOFEW;
#define REQUIRE_OUTBUF(n) \
if (outleft < (n)) \
return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);
#define WRITE1(c1) \
REQUIRE_OUTBUF(1) \
(*outbuf)[0] = (c1);
#define WRITE2(c1, c2) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3) \
REQUIRE_OUTBUF(3) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4) \
REQUIRE_OUTBUF(4) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4);
#if Py_UNICODE_SIZE == 2
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
NEXT_OUT(2)
#else
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(1) \
**outbuf = (Py_UNICODE)(c); \
NEXT_OUT(1)
#endif
#define _TRYMAP_ENC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != NOCHAR)
#define TRYMAP_ENC_COND(charset, assi, uni) \
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
#define TRYMAP_ENC(charset, assi, uni) \
if TRYMAP_ENC_COND(charset, assi, uni)
#define _TRYMAP_DEC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, assi, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && \
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
assplane, asshi, asslo, (uni) & 0xff)
#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
REQUIRE_INBUF(2) \
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
((ucs4_t)(IN2) - 0xdc00); \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c) 1
#endif
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
#define END_MAPPINGS_LIST \
{"", NULL, NULL} }; \
static const struct dbcs_map *mapping_list = \
(const struct dbcs_map *)_mapping_list;
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
#define _STATEFUL_METHODS(enc) \
enc##_encode, \
enc##_encode_init, \
enc##_encode_reset, \
enc##_decode, \
enc##_decode_init, \
enc##_decode_reset,
#define _STATELESS_METHODS(enc) \
enc##_encode, NULL, NULL, \
enc##_decode, NULL, NULL,
#define CODEC_STATEFUL(enc) { \
#enc, NULL, NULL, \
_STATEFUL_METHODS(enc) \
},
#define CODEC_STATELESS(enc) { \
#enc, NULL, NULL, \
_STATELESS_METHODS(enc) \
},
#define CODEC_STATELESS_WINIT(enc) { \
#enc, NULL, \
enc##_codec_init, \
_STATELESS_METHODS(enc) \
},
#define END_CODECS_LIST \
{"", NULL,} }; \
static const MultibyteCodec *codec_list = \
(const MultibyteCodec *)_codec_list;
static PyObject *
getmultibytecodec(void)
{
static PyObject *cofunc = NULL;
if (cofunc == NULL) {
PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
if (mod == NULL)
return NULL;
cofunc = PyObject_GetAttrString(mod, "__create_codec");
Py_DECREF(mod);
}
return cofunc;
}
static PyObject *
getcodec(PyObject *self, PyObject *encoding)
{
PyObject *codecobj, *r, *cofunc;
const MultibyteCodec *codec;
const char *enc;
if (!PyString_Check(encoding)) {
PyErr_SetString(PyExc_TypeError,
"encoding name must be a string.");
return NULL;
}
cofunc = getmultibytecodec();
if (cofunc == NULL)
return NULL;
enc = PyString_AS_STRING(encoding);
for (codec = codec_list; codec->encoding[0]; codec++)
if (strcmp(codec->encoding, enc) == 0)
break;
if (codec->encoding[0] == '\0') {
PyErr_SetString(PyExc_LookupError,
"no such codec is supported.");
return NULL;
}
codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
if (codecobj == NULL)
return NULL;
r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
Py_DECREF(codecobj);
return r;
}
static struct PyMethodDef __methods[] = {
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
{NULL, NULL},
};
static int
register_maps(PyObject *module)
{
const struct dbcs_map *h;
for (h = mapping_list; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
int r;
strcpy(mhname + sizeof("__map_") - 1, h->charset);
r = PyModule_AddObject(module, mhname,
PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
if (r == -1)
return -1;
}
return 0;
}
#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR
find_pairencmap(ucs2_t body, ucs2_t modifier,
const struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
ucs4_t value = body << 16 | modifier;
min = 0;
max = haystacksize;
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
if (value < haystack[pos].uniseq) {
if (max == pos) break;
else max = pos;
}
else if (value > haystack[pos].uniseq) {
if (min == pos) break;
else min = pos;
}
else
break;
if (value == haystack[pos].uniseq)
return haystack[pos].code;
else
return DBCINV;
}
#endif
#ifdef USING_IMPORTED_MAPS
#define IMPORT_MAP(locale, charset, encmap, decmap) \
importmap("_codecs_" #locale, "__map_" #charset, \
(const void**)encmap, (const void**)decmap)
static int
importmap(const char *modname, const char *symbol,
const void **encmap, const void **decmap)
{
PyObject *o, *mod;
mod = PyImport_ImportModule((char *)modname);
if (mod == NULL)
return -1;
o = PyObject_GetAttrString(mod, (char*)symbol);
if (o == NULL)
goto errorexit;
else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
PyErr_SetString(PyExc_ValueError,
"map data must be a Capsule.");
goto errorexit;
}
else {
struct dbcs_map *map;
map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
if (encmap != NULL)
*encmap = map->encmap;
if (decmap != NULL)
*decmap = map->decmap;
Py_DECREF(o);
}
Py_DECREF(mod);
return 0;
errorexit:
Py_DECREF(mod);
return -1;
}
#endif
#define I_AM_A_MODULE_FOR(loc) \
void \
init_codecs_##loc(void) \
{ \
PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
if (m != NULL) \
(void)register_maps(m); \
}
#endif

View File

@ -0,0 +1,43 @@
/* These routines may be quite inefficient, but it's used only to emulate old
* standards. */
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1
#endif
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
if (config == (void *)2000 && ( \
(c) == 0x9B1C || (c) == 0x4FF1 || \
(c) == 0x525D || (c) == 0x541E || \
(c) == 0x5653 || (c) == 0x59F8 || \
(c) == 0x5C5B || (c) == 0x5E77 || \
(c) == 0x7626 || (c) == 0x7E6B)) \
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
else if (config == (void *)2000 && (c) == 0x9B1D) \
(assi) = 0x8000 | 0x7d3b; \
#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
if (config == (void *)2000 && (c) == 0x20B9F) \
return EMULATE_JISX0213_2000_ENCODE_INVALID;
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
#define EMULATE_JISX0213_2000_DECODE_INVALID 2
#endif
#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
if (config == (void *)2000 && \
(((c1) == 0x2E && (c2) == 0x21) || \
((c1) == 0x2F && (c2) == 0x7E) || \
((c1) == 0x4F && (c2) == 0x54) || \
((c1) == 0x4F && (c2) == 0x7E) || \
((c1) == 0x74 && (c2) == 0x27) || \
((c1) == 0x7E && (c2) == 0x7A) || \
((c1) == 0x7E && (c2) == 0x7B) || \
((c1) == 0x7E && (c2) == 0x7C) || \
((c1) == 0x7E && (c2) == 0x7D) || \
((c1) == 0x7E && (c2) == 0x7E))) \
return EMULATE_JISX0213_2000_DECODE_INVALID;
#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
(assi) = 0x9B1D;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
#define JISX0213_ENCPAIRS 46
#ifdef EXTERN_JISX0213_PAIR
static const struct widedbcs_index *jisx0213_pair_decmap;
static const struct pair_encodemap *jisx0213_pair_encmap;
#else
static const ucs4_t __jisx0213_pair_decmap[49] = {
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
39453441,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,48825061,48562921,
};
static const struct widedbcs_index jisx0213_pair_decmap[256] = {
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap
+0,119,123},{__jisx0213_pair_decmap+5,119,126},{__jisx0213_pair_decmap+13,120,
120},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap+14,68,102},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
};
static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {
{0x00e60000,0x295c},{0x00e60300,0x2b44},{0x02540000,0x2b38},{0x02540300,0x2b48
},{0x02540301,0x2b49},{0x02590000,0x2b30},{0x02590300,0x2b4c},{0x02590301,
0x2b4d},{0x025a0000,0x2b43},{0x025a0300,0x2b4e},{0x025a0301,0x2b4f},{
0x028c0000,0x2b37},{0x028c0300,0x2b4a},{0x028c0301,0x2b4b},{0x02e50000,0x2b60
},{0x02e502e9,0x2b66},{0x02e90000,0x2b64},{0x02e902e5,0x2b65},{0x304b0000,
0x242b},{0x304b309a,0x2477},{0x304d0000,0x242d},{0x304d309a,0x2478},{
0x304f0000,0x242f},{0x304f309a,0x2479},{0x30510000,0x2431},{0x3051309a,0x247a
},{0x30530000,0x2433},{0x3053309a,0x247b},{0x30ab0000,0x252b},{0x30ab309a,
0x2577},{0x30ad0000,0x252d},{0x30ad309a,0x2578},{0x30af0000,0x252f},{
0x30af309a,0x2579},{0x30b10000,0x2531},{0x30b1309a,0x257a},{0x30b30000,0x2533
},{0x30b3309a,0x257b},{0x30bb0000,0x253b},{0x30bb309a,0x257c},{0x30c40000,
0x2544},{0x30c4309a,0x257d},{0x30c80000,0x2548},{0x30c8309a,0x257e},{
0x31f70000,0x2675},{0x31f7309a,0x2678},
};
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,141 @@
/*
* multibytecodec.h: Common Multibyte Codec Implementation
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#ifndef _PYTHON_MULTIBYTECODEC_H_
#define _PYTHON_MULTIBYTECODEC_H_
#ifdef __cplusplus
extern "C" {
#endif
#ifdef uint32_t
typedef uint32_t ucs4_t;
#else
typedef unsigned int ucs4_t;
#endif
#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
typedef unsigned short ucs2_t, DBCHAR;
#endif
typedef union {
void *p;
int i;
unsigned char c[8];
ucs2_t u2[4];
ucs4_t u4[2];
} MultibyteCodec_State;
typedef int (*mbcodec_init)(const void *config);
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
const void *config,
const Py_UNICODE **inbuf, Py_ssize_t inleft,
unsigned char **outbuf, Py_ssize_t outleft,
int flags);
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
const void *config,
unsigned char **outbuf, Py_ssize_t outleft);
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
const void *config,
const unsigned char **inbuf, Py_ssize_t inleft,
Py_UNICODE **outbuf, Py_ssize_t outleft);
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
const void *config);
typedef struct {
const char *encoding;
const void *config;
mbcodec_init codecinit;
mbencode_func encode;
mbencodeinit_func encinit;
mbencodereset_func encreset;
mbdecode_func decode;
mbdecodeinit_func decinit;
mbdecodereset_func decreset;
} MultibyteCodec;
typedef struct {
PyObject_HEAD
MultibyteCodec *codec;
} MultibyteCodecObject;
#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
#define _MultibyteStatefulCodec_HEAD \
PyObject_HEAD \
MultibyteCodec *codec; \
MultibyteCodec_State state; \
PyObject *errors;
typedef struct {
_MultibyteStatefulCodec_HEAD
} MultibyteStatefulCodecContext;
#define MAXENCPENDING 2
#define _MultibyteStatefulEncoder_HEAD \
_MultibyteStatefulCodec_HEAD \
Py_UNICODE pending[MAXENCPENDING]; \
Py_ssize_t pendingsize;
typedef struct {
_MultibyteStatefulEncoder_HEAD
} MultibyteStatefulEncoderContext;
#define MAXDECPENDING 8
#define _MultibyteStatefulDecoder_HEAD \
_MultibyteStatefulCodec_HEAD \
unsigned char pending[MAXDECPENDING]; \
Py_ssize_t pendingsize;
typedef struct {
_MultibyteStatefulDecoder_HEAD
} MultibyteStatefulDecoderContext;
typedef struct {
_MultibyteStatefulEncoder_HEAD
} MultibyteIncrementalEncoderObject;
typedef struct {
_MultibyteStatefulDecoder_HEAD
} MultibyteIncrementalDecoderObject;
typedef struct {
_MultibyteStatefulDecoder_HEAD
PyObject *stream;
} MultibyteStreamReaderObject;
typedef struct {
_MultibyteStatefulEncoder_HEAD
PyObject *stream;
} MultibyteStreamWriterObject;
/* positive values for illegal sequences */
#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
#define MBERR_TOOFEW (-2) /* incomplete input buffer */
#define MBERR_INTERNAL (-3) /* internal runtime error */
#define ERROR_STRICT (PyObject *)(1)
#define ERROR_IGNORE (PyObject *)(2)
#define ERROR_REPLACE (PyObject *)(3)
#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
#define ERROR_DECREF(p) do { \
if (p != NULL && ERROR_ISCUSTOM(p)) { \
Py_DECREF(p); \
} \
} while (0);
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
#define MBENC_MAX MBENC_FLUSH
#define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*"
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
and Clark Cooper
Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,92 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#define ASCII_A 0x41
#define ASCII_B 0x42
#define ASCII_C 0x43
#define ASCII_D 0x44
#define ASCII_E 0x45
#define ASCII_F 0x46
#define ASCII_G 0x47
#define ASCII_H 0x48
#define ASCII_I 0x49
#define ASCII_J 0x4A
#define ASCII_K 0x4B
#define ASCII_L 0x4C
#define ASCII_M 0x4D
#define ASCII_N 0x4E
#define ASCII_O 0x4F
#define ASCII_P 0x50
#define ASCII_Q 0x51
#define ASCII_R 0x52
#define ASCII_S 0x53
#define ASCII_T 0x54
#define ASCII_U 0x55
#define ASCII_V 0x56
#define ASCII_W 0x57
#define ASCII_X 0x58
#define ASCII_Y 0x59
#define ASCII_Z 0x5A
#define ASCII_a 0x61
#define ASCII_b 0x62
#define ASCII_c 0x63
#define ASCII_d 0x64
#define ASCII_e 0x65
#define ASCII_f 0x66
#define ASCII_g 0x67
#define ASCII_h 0x68
#define ASCII_i 0x69
#define ASCII_j 0x6A
#define ASCII_k 0x6B
#define ASCII_l 0x6C
#define ASCII_m 0x6D
#define ASCII_n 0x6E
#define ASCII_o 0x6F
#define ASCII_p 0x70
#define ASCII_q 0x71
#define ASCII_r 0x72
#define ASCII_s 0x73
#define ASCII_t 0x74
#define ASCII_u 0x75
#define ASCII_v 0x76
#define ASCII_w 0x77
#define ASCII_x 0x78
#define ASCII_y 0x79
#define ASCII_z 0x7A
#define ASCII_0 0x30
#define ASCII_1 0x31
#define ASCII_2 0x32
#define ASCII_3 0x33
#define ASCII_4 0x34
#define ASCII_5 0x35
#define ASCII_6 0x36
#define ASCII_7 0x37
#define ASCII_8 0x38
#define ASCII_9 0x39
#define ASCII_TAB 0x09
#define ASCII_SPACE 0x20
#define ASCII_EXCL 0x21
#define ASCII_QUOT 0x22
#define ASCII_AMP 0x26
#define ASCII_APOS 0x27
#define ASCII_MINUS 0x2D
#define ASCII_PERIOD 0x2E
#define ASCII_COLON 0x3A
#define ASCII_SEMI 0x3B
#define ASCII_LT 0x3C
#define ASCII_EQUALS 0x3D
#define ASCII_GT 0x3E
#define ASCII_LSQB 0x5B
#define ASCII_RSQB 0x5D
#define ASCII_UNDERSCORE 0x5F
#define ASCII_LPAREN 0x28
#define ASCII_RPAREN 0x29
#define ASCII_FF 0x0C
#define ASCII_SLASH 0x2F
#define ASCII_HASH 0x23
#define ASCII_PIPE 0x7C
#define ASCII_COMMA 0x2C

View File

@ -0,0 +1,36 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
/* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML,
/* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML,
/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS,
/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS,
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,119 @@
/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef Expat_External_INCLUDED
#define Expat_External_INCLUDED 1
/* External API definitions */
/* Namespace external symbols to allow multiple libexpat version to
co-exist. */
#include "pyexpatns.h"
#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
#define XML_USE_MSC_EXTENSIONS 1
#endif
/* Expat tries very hard to make the API boundary very specifically
defined. There are two macros defined to control this boundary;
each of these can be defined before including this header to
achieve some different behavior, but doing so it not recommended or
tested frequently.
XMLCALL - The calling convention to use for all calls across the
"library boundary." This will default to cdecl, and
try really hard to tell the compiler that's what we
want.
XMLIMPORT - Whatever magic is needed to note that a function is
to be imported from a dynamically loaded library
(.dll, .so, or .sl, depending on your platform).
The XMLCALL macro was added in Expat 1.95.7. The only one which is
expected to be directly useful in client code is XMLCALL.
Note that on at least some Unix versions, the Expat library must be
compiled with the cdecl calling convention as the default since
system headers may assume the cdecl convention.
*/
#ifndef XMLCALL
#if defined(_MSC_VER)
#define XMLCALL __cdecl
#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER)
#define XMLCALL __attribute__((cdecl))
#else
/* For any platform which uses this definition and supports more than
one calling convention, we need to extend this definition to
declare the convention used on that platform, if it's possible to
do so.
If this is the case for your platform, please file a bug report
with information on how to identify your platform via the C
pre-processor and how to specify the same calling convention as the
platform's malloc() implementation.
*/
#define XMLCALL
#endif
#endif /* not defined XMLCALL */
#if !defined(XML_STATIC) && !defined(XMLIMPORT)
#ifndef XML_BUILDING_EXPAT
/* using Expat from an application */
#ifdef XML_USE_MSC_EXTENSIONS
#define XMLIMPORT __declspec(dllimport)
#endif
#endif
#endif /* not defined XML_STATIC */
/* If we didn't define it above, define it away: */
#ifndef XMLIMPORT
#define XMLIMPORT
#endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
#ifdef __cplusplus
extern "C" {
#endif
#ifdef XML_UNICODE_WCHAR_T
#define XML_UNICODE
#endif
#ifdef XML_UNICODE /* Information is UTF-16 encoded. */
#ifdef XML_UNICODE_WCHAR_T
typedef wchar_t XML_Char;
typedef wchar_t XML_LChar;
#else
typedef unsigned short XML_Char;
typedef char XML_LChar;
#endif /* XML_UNICODE_WCHAR_T */
#else /* Information is UTF-8 encoded. */
typedef char XML_Char;
typedef char XML_LChar;
#endif /* XML_UNICODE */
#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */
#if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400
typedef __int64 XML_Index;
typedef unsigned __int64 XML_Size;
#else
typedef long long XML_Index;
typedef unsigned long long XML_Size;
#endif
#else
typedef long XML_Index;
typedef unsigned long XML_Size;
#endif /* XML_LARGE_SIZE */
#ifdef __cplusplus
}
#endif
#endif /* not Expat_External_INCLUDED */

View File

@ -0,0 +1,37 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
/* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */
/* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML,
/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML,
/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS,
/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS,
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,

View File

@ -0,0 +1,73 @@
/* internal.h
Internal definitions used by Expat. This is not needed to compile
client code.
The following calling convention macros are defined for frequently
called functions:
FASTCALL - Used for those internal functions that have a simple
body and a low number of arguments and local variables.
PTRCALL - Used for functions called though function pointers.
PTRFASTCALL - Like PTRCALL, but for low number of arguments.
inline - Used for selected internal functions for which inlining
may improve performance on some platforms.
Note: Use of these macros is based on judgement, not hard rules,
and therefore subject to change.
*/
#if defined(__GNUC__) && defined(__i386__) && !defined(__MINGW32__)
/* We'll use this version by default only where we know it helps.
regparm() generates warnings on Solaris boxes. See SF bug #692878.
Instability reported with egcs on a RedHat Linux 7.3.
Let's comment out:
#define FASTCALL __attribute__((stdcall, regparm(3)))
and let's try this:
*/
#define FASTCALL __attribute__((regparm(3)))
#define PTRFASTCALL __attribute__((regparm(3)))
#endif
/* Using __fastcall seems to have an unexpected negative effect under
MS VC++, especially for function pointers, so we won't use it for
now on that platform. It may be reconsidered for a future release
if it can be made more effective.
Likely reason: __fastcall on Windows is like stdcall, therefore
the compiler cannot perform stack optimizations for call clusters.
*/
/* Make sure all of these are defined if they aren't already. */
#ifndef FASTCALL
#define FASTCALL
#endif
#ifndef PTRCALL
#define PTRCALL
#endif
#ifndef PTRFASTCALL
#define PTRFASTCALL
#endif
#ifndef XML_MIN_SIZE
#if !defined(__cplusplus) && !defined(inline)
#ifdef __GNUC__
#define inline __inline
#endif /* __GNUC__ */
#endif
#endif /* XML_MIN_SIZE */
#ifdef __cplusplus
#define inline inline
#else
#ifndef inline
#define inline
#endif
#endif

View File

@ -0,0 +1,36 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
/* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME,
/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,

View File

@ -0,0 +1,150 @@
static const unsigned namingBitmap[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF,
0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF,
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000,
0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060,
0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003,
0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003,
0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001,
0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003,
0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000,
0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003,
0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000,
0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000,
0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF,
0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB,
0x40000000, 0xF580C900, 0x00000007, 0x02010800,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
0x00000000, 0x00004C40, 0x00000000, 0x00000000,
0x00000007, 0x00000000, 0x00000000, 0x00000000,
0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF,
0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF,
0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF,
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003,
0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF,
0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF,
0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF,
0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF,
0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1,
0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3,
0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000,
0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000,
0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF,
0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x1FFF0000, 0x00000002,
0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF,
};
static const unsigned char nmstrtPages[] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00,
0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
static const unsigned char namePages[] = {
0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00,
0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};

View File

@ -0,0 +1,37 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
/* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
/* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,114 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlRole_INCLUDED
#define XmlRole_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt
#endif
#include "xmltok.h"
#ifdef __cplusplus
extern "C" {
#endif
enum {
XML_ROLE_ERROR = -1,
XML_ROLE_NONE = 0,
XML_ROLE_XML_DECL,
XML_ROLE_INSTANCE_START,
XML_ROLE_DOCTYPE_NONE,
XML_ROLE_DOCTYPE_NAME,
XML_ROLE_DOCTYPE_SYSTEM_ID,
XML_ROLE_DOCTYPE_PUBLIC_ID,
XML_ROLE_DOCTYPE_INTERNAL_SUBSET,
XML_ROLE_DOCTYPE_CLOSE,
XML_ROLE_GENERAL_ENTITY_NAME,
XML_ROLE_PARAM_ENTITY_NAME,
XML_ROLE_ENTITY_NONE,
XML_ROLE_ENTITY_VALUE,
XML_ROLE_ENTITY_SYSTEM_ID,
XML_ROLE_ENTITY_PUBLIC_ID,
XML_ROLE_ENTITY_COMPLETE,
XML_ROLE_ENTITY_NOTATION_NAME,
XML_ROLE_NOTATION_NONE,
XML_ROLE_NOTATION_NAME,
XML_ROLE_NOTATION_SYSTEM_ID,
XML_ROLE_NOTATION_NO_SYSTEM_ID,
XML_ROLE_NOTATION_PUBLIC_ID,
XML_ROLE_ATTRIBUTE_NAME,
XML_ROLE_ATTRIBUTE_TYPE_CDATA,
XML_ROLE_ATTRIBUTE_TYPE_ID,
XML_ROLE_ATTRIBUTE_TYPE_IDREF,
XML_ROLE_ATTRIBUTE_TYPE_IDREFS,
XML_ROLE_ATTRIBUTE_TYPE_ENTITY,
XML_ROLE_ATTRIBUTE_TYPE_ENTITIES,
XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN,
XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS,
XML_ROLE_ATTRIBUTE_ENUM_VALUE,
XML_ROLE_ATTRIBUTE_NOTATION_VALUE,
XML_ROLE_ATTLIST_NONE,
XML_ROLE_ATTLIST_ELEMENT_NAME,
XML_ROLE_IMPLIED_ATTRIBUTE_VALUE,
XML_ROLE_REQUIRED_ATTRIBUTE_VALUE,
XML_ROLE_DEFAULT_ATTRIBUTE_VALUE,
XML_ROLE_FIXED_ATTRIBUTE_VALUE,
XML_ROLE_ELEMENT_NONE,
XML_ROLE_ELEMENT_NAME,
XML_ROLE_CONTENT_ANY,
XML_ROLE_CONTENT_EMPTY,
XML_ROLE_CONTENT_PCDATA,
XML_ROLE_GROUP_OPEN,
XML_ROLE_GROUP_CLOSE,
XML_ROLE_GROUP_CLOSE_REP,
XML_ROLE_GROUP_CLOSE_OPT,
XML_ROLE_GROUP_CLOSE_PLUS,
XML_ROLE_GROUP_CHOICE,
XML_ROLE_GROUP_SEQUENCE,
XML_ROLE_CONTENT_ELEMENT,
XML_ROLE_CONTENT_ELEMENT_REP,
XML_ROLE_CONTENT_ELEMENT_OPT,
XML_ROLE_CONTENT_ELEMENT_PLUS,
XML_ROLE_PI,
XML_ROLE_COMMENT,
#ifdef XML_DTD
XML_ROLE_TEXT_DECL,
XML_ROLE_IGNORE_SECT,
XML_ROLE_INNER_PARAM_ENTITY_REF,
#endif /* XML_DTD */
XML_ROLE_PARAM_ENTITY_REF
};
typedef struct prolog_state {
int (PTRCALL *handler) (struct prolog_state *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc);
unsigned level;
int role_none;
#ifdef XML_DTD
unsigned includeLevel;
int documentEntity;
int inEntityValue;
#endif /* XML_DTD */
} PROLOG_STATE;
void XmlPrologStateInit(PROLOG_STATE *);
#ifdef XML_DTD
void XmlPrologStateInitExternalEntity(PROLOG_STATE *);
#endif /* XML_DTD */
#define XmlTokenRole(state, tok, ptr, end, enc) \
(((state)->handler)(state, tok, ptr, end, enc))
#ifdef __cplusplus
}
#endif
#endif /* not XmlRole_INCLUDED */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,316 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlTok_INCLUDED
#define XmlTok_INCLUDED 1
#ifdef __cplusplus
extern "C" {
#endif
/* The following token may be returned by XmlContentTok */
#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
start of illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_NONE -4 /* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1 /* only part of a token */
#define XML_TOK_INVALID 0
/* The following tokens are returned by XmlContentTok; some are also
returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
*/
#define XML_TOK_START_TAG_WITH_ATTS 1
#define XML_TOK_START_TAG_NO_ATTS 2
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
#define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
#define XML_TOK_END_TAG 5
#define XML_TOK_DATA_CHARS 6
#define XML_TOK_DATA_NEWLINE 7
#define XML_TOK_CDATA_SECT_OPEN 8
#define XML_TOK_ENTITY_REF 9
#define XML_TOK_CHAR_REF 10 /* numeric character reference */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
#define XML_TOK_COMMENT 13
#define XML_TOK_BOM 14 /* Byte order mark */
/* The following tokens are returned only by XmlPrologTok */
#define XML_TOK_PROLOG_S 15
#define XML_TOK_DECL_OPEN 16 /* <!foo */
#define XML_TOK_DECL_CLOSE 17 /* > */
#define XML_TOK_NAME 18
#define XML_TOK_NMTOKEN 19
#define XML_TOK_POUND_NAME 20 /* #name */
#define XML_TOK_OR 21 /* | */
#define XML_TOK_PERCENT 22
#define XML_TOK_OPEN_PAREN 23
#define XML_TOK_CLOSE_PAREN 24
#define XML_TOK_OPEN_BRACKET 25
#define XML_TOK_CLOSE_BRACKET 26
#define XML_TOK_LITERAL 27
#define XML_TOK_PARAM_ENTITY_REF 28
#define XML_TOK_INSTANCE_START 29
/* The following occur only in element type declarations */
#define XML_TOK_NAME_QUESTION 30 /* name? */
#define XML_TOK_NAME_ASTERISK 31 /* name* */
#define XML_TOK_NAME_PLUS 32 /* name+ */
#define XML_TOK_COND_SECT_OPEN 33 /* <![ */
#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
#define XML_TOK_COMMA 38
/* The following token is returned only by XmlAttributeValueTok */
#define XML_TOK_ATTRIBUTE_VALUE_S 39
/* The following token is returned only by XmlCdataSectionTok */
#define XML_TOK_CDATA_SECT_CLOSE 40
/* With namespace processing this is returned by XmlPrologTok for a
name with a colon.
*/
#define XML_TOK_PREFIXED_NAME 41
#ifdef XML_DTD
#define XML_TOK_IGNORE_SECT 42
#endif /* XML_DTD */
#ifdef XML_DTD
#define XML_N_STATES 4
#else /* not XML_DTD */
#define XML_N_STATES 3
#endif /* not XML_DTD */
#define XML_PROLOG_STATE 0
#define XML_CONTENT_STATE 1
#define XML_CDATA_SECTION_STATE 2
#ifdef XML_DTD
#define XML_IGNORE_SECTION_STATE 3
#endif /* XML_DTD */
#define XML_N_LITERAL_TYPES 2
#define XML_ATTRIBUTE_VALUE_LITERAL 0
#define XML_ENTITY_VALUE_LITERAL 1
/* The size of the buffer passed to XmlUtf8Encode must be at least this. */
#define XML_UTF8_ENCODE_MAX 4
/* The size of the buffer passed to XmlUtf16Encode must be at least this. */
#define XML_UTF16_ENCODE_MAX 2
typedef struct position {
/* first line and first column are 0 not 1 */
XML_Size lineNumber;
XML_Size columnNumber;
} POSITION;
typedef struct {
const char *name;
const char *valuePtr;
const char *valueEnd;
char normalized;
} ATTRIBUTE;
struct encoding;
typedef struct encoding ENCODING;
typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *,
const char *,
const char **);
struct encoding {
SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES];
int (PTRCALL *sameName)(const ENCODING *,
const char *,
const char *);
int (PTRCALL *nameMatchesAscii)(const ENCODING *,
const char *,
const char *,
const char *);
int (PTRFASTCALL *nameLength)(const ENCODING *, const char *);
const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
int (PTRCALL *getAtts)(const ENCODING *enc,
const char *ptr,
int attsMax,
ATTRIBUTE *atts);
int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
int (PTRCALL *predefinedEntityName)(const ENCODING *,
const char *,
const char *);
void (PTRCALL *updatePosition)(const ENCODING *,
const char *ptr,
const char *end,
POSITION *);
int (PTRCALL *isPublicId)(const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr);
void (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
char **toP,
const char *toLim);
void (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
unsigned short **toP,
const unsigned short *toLim);
int minBytesPerChar;
char isUtf8;
char isUtf16;
};
/* Scan the string starting at ptr until the end of the next complete
token, but do not scan past eptr. Return an integer giving the
type of token.
Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
Return XML_TOK_PARTIAL when the string does not contain a complete
token; nextTokPtr will not be set.
Return XML_TOK_INVALID when the string does not start a valid
token; nextTokPtr will be set to point to the character which made
the token invalid.
Otherwise the string starts with a valid token; nextTokPtr will be
set to point to the character following the end of that token.
Each data character counts as a single token, but adjacent data
characters may be returned together. Similarly for characters in
the prolog outside literals, comments and processing instructions.
*/
#define XmlTok(enc, state, ptr, end, nextTokPtr) \
(((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
#define XmlPrologTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
#define XmlContentTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
#ifdef XML_DTD
#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
#endif /* XML_DTD */
/* This is used for performing a 2nd-level tokenization on the content
of a literal that has already been returned by XmlTok.
*/
#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
(((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
(((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
#define XmlNameLength(enc, ptr) \
(((enc)->nameLength)(enc, ptr))
#define XmlSkipS(enc, ptr) \
(((enc)->skipS)(enc, ptr))
#define XmlGetAttributes(enc, ptr, attsMax, atts) \
(((enc)->getAtts)(enc, ptr, attsMax, atts))
#define XmlCharRefNumber(enc, ptr) \
(((enc)->charRefNumber)(enc, ptr))
#define XmlPredefinedEntityName(enc, ptr, end) \
(((enc)->predefinedEntityName)(enc, ptr, end))
#define XmlUpdatePosition(enc, ptr, end, pos) \
(((enc)->updatePosition)(enc, ptr, end, pos))
#define XmlIsPublicId(enc, ptr, end, badPtr) \
(((enc)->isPublicId)(enc, ptr, end, badPtr))
#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
(((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
(((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
typedef struct {
ENCODING initEnc;
const ENCODING **encPtr;
} INIT_ENCODING;
int XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void);
const ENCODING *XmlGetUtf16InternalEncoding(void);
int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
int XmlSizeOfUnknownEncoding(void);
typedef int (XMLCALL *CONVERTER) (void *userData, const char *p);
ENCODING *
XmlInitUnknownEncoding(void *mem,
int *table,
CONVERTER convert,
void *userData);
int XmlParseXmlDeclNS(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING *
XmlInitUnknownEncodingNS(void *mem,
int *table,
CONVERTER convert,
void *userData);
#ifdef __cplusplus
}
#endif
#endif /* not XmlTok_INCLUDED */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
enum {
BT_NONXML,
BT_MALFORM,
BT_LT,
BT_AMP,
BT_RSQB,
BT_LEAD2,
BT_LEAD3,
BT_LEAD4,
BT_TRAIL,
BT_CR,
BT_LF,
BT_GT,
BT_QUOT,
BT_APOS,
BT_EQUALS,
BT_QUEST,
BT_EXCL,
BT_SOL,
BT_SEMI,
BT_NUM,
BT_LSQB,
BT_S,
BT_NMSTRT,
BT_COLON,
BT_HEX,
BT_DIGIT,
BT_NAME,
BT_MINUS,
BT_OTHER, /* known not to be a name or name start character */
BT_NONASCII, /* might be a name or name start character */
BT_PERCNT,
BT_LPAR,
BT_RPAR,
BT_AST,
BT_PLUS,
BT_COMMA,
BT_VERBAR
};
#include <stddef.h>

View File

@ -0,0 +1,115 @@
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
/* This file is included! */
#ifdef XML_TOK_NS_C
const ENCODING *
NS(XmlGetUtf8InternalEncoding)(void)
{
return &ns(internal_utf8_encoding).enc;
}
const ENCODING *
NS(XmlGetUtf16InternalEncoding)(void)
{
#if BYTEORDER == 1234
return &ns(internal_little2_encoding).enc;
#elif BYTEORDER == 4321
return &ns(internal_big2_encoding).enc;
#else
const short n = 1;
return (*(const char *)&n
? &ns(internal_little2_encoding).enc
: &ns(internal_big2_encoding).enc);
#endif
}
static const ENCODING * const NS(encodings)[] = {
&ns(latin1_encoding).enc,
&ns(ascii_encoding).enc,
&ns(utf8_encoding).enc,
&ns(big2_encoding).enc,
&ns(big2_encoding).enc,
&ns(little2_encoding).enc,
&ns(utf8_encoding).enc /* NO_ENC */
};
static int PTRCALL
NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_PROLOG_STATE, ptr, end, nextTokPtr);
}
static int PTRCALL
NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_CONTENT_STATE, ptr, end, nextTokPtr);
}
int
NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
const char *name)
{
int i = getEncodingIndex(name);
if (i == UNKNOWN_ENC)
return 0;
SET_INIT_ENC_INDEX(p, i);
p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog);
p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent);
p->initEnc.updatePosition = initUpdatePosition;
p->encPtr = encPtr;
*encPtr = &(p->initEnc);
return 1;
}
static const ENCODING *
NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
{
#define ENCODING_MAX 128
char buf[ENCODING_MAX];
char *p = buf;
int i;
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
if (ptr != end)
return 0;
*p = 0;
if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2)
return enc;
i = getEncodingIndex(buf);
if (i == UNKNOWN_ENC)
return 0;
return NS(encodings)[i];
}
int
NS(XmlParseXmlDecl)(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
return doParseXmlDecl(NS(findEncoding),
isGeneralTextEntity,
enc,
ptr,
end,
badPtr,
versionPtr,
versionEndPtr,
encodingName,
encoding,
standalone);
}
#endif /* XML_TOK_NS_C */

View File

@ -0,0 +1,105 @@
/* future_builtins module */
/* This module provides functions that will be builtins in Python 3.0,
but that conflict with builtins that already exist in Python
2.x. */
#include "Python.h"
PyDoc_STRVAR(module_doc,
"This module provides functions that will be builtins in Python 3.0,\n\
but that conflict with builtins that already exist in Python 2.x.\n\
\n\
Functions:\n\
\n\
ascii(arg) -- Returns the canonical string representation of an object.\n\
filter(pred, iterable) -- Returns an iterator yielding those items of \n\
iterable for which pred(item) is true.\n\
hex(arg) -- Returns the hexadecimal representation of an integer.\n\
map(func, *iterables) -- Returns an iterator that computes the function \n\
using arguments from each of the iterables.\n\
oct(arg) -- Returns the octal representation of an integer.\n\
zip(iter1 [,iter2 [...]]) -- Returns a zip object whose .next() method \n\
returns a tuple where the i-th element comes from the i-th iterable \n\
argument.\n\
\n\
The typical usage of this module is to replace existing builtins in a\n\
module's namespace:\n \n\
from future_builtins import ascii, filter, map, hex, oct, zip\n");
static PyObject *
builtin_hex(PyObject *self, PyObject *v)
{
return PyNumber_ToBase(v, 16);
}
PyDoc_STRVAR(hex_doc,
"hex(number) -> string\n\
\n\
Return the hexadecimal representation of an integer or long integer.");
static PyObject *
builtin_oct(PyObject *self, PyObject *v)
{
return PyNumber_ToBase(v, 8);
}
PyDoc_STRVAR(oct_doc,
"oct(number) -> string\n\
\n\
Return the octal representation of an integer or long integer.");
static PyObject *
builtin_ascii(PyObject *self, PyObject *v)
{
return PyObject_Repr(v);
}
PyDoc_STRVAR(ascii_doc,
"ascii(object) -> string\n\
\n\
Return the same as repr(). In Python 3.x, the repr() result will\n\
contain printable characters unescaped, while the ascii() result\n\
will have such characters backslash-escaped.");
/* List of functions exported by this module */
static PyMethodDef module_functions[] = {
{"hex", builtin_hex, METH_O, hex_doc},
{"oct", builtin_oct, METH_O, oct_doc},
{"ascii", builtin_ascii, METH_O, ascii_doc},
{NULL, NULL} /* Sentinel */
};
/* Initialize this module. */
PyMODINIT_FUNC
initfuture_builtins(void)
{
PyObject *m, *itertools, *iter_func;
char *it_funcs[] = {"imap", "ifilter", "izip", NULL};
char **cur_func;
m = Py_InitModule3("future_builtins", module_functions, module_doc);
if (m == NULL)
return;
itertools = PyImport_ImportModuleNoBlock("itertools");
if (itertools == NULL)
return;
/* If anything in the following loop fails, we fall through. */
for (cur_func = it_funcs; *cur_func; ++cur_func){
iter_func = PyObject_GetAttrString(itertools, *cur_func);
if (iter_func == NULL ||
PyModule_AddObject(m, *cur_func+1, iter_func) < 0)
break;
}
Py_DECREF(itertools);
/* any other initialization needed */
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
#include "Python.h"
#ifndef DONT_HAVE_STDIO_H
#include <stdio.h>
#endif
#ifndef DATE
#ifdef __DATE__
#define DATE __DATE__
#else
#define DATE "xx/xx/xx"
#endif
#endif
#ifndef TIME
#ifdef __TIME__
#define TIME __TIME__
#else
#define TIME "xx:xx:xx"
#endif
#endif
/* on unix, SVNVERSION is passed on the command line.
* on Windows, the string is interpolated using
* subwcrev.exe
*/
#ifndef SVNVERSION
#define SVNVERSION "$WCRANGE$$WCMODS?M:$"
#endif
/* XXX Only unix build process has been tested */
#ifndef HGVERSION
#define HGVERSION ""
#endif
#ifndef HGTAG
#define HGTAG ""
#endif
#ifndef HGBRANCH
#define HGBRANCH ""
#endif
const char *
Py_GetBuildInfo(void)
{
static char buildinfo[50 + sizeof(HGVERSION) +
((sizeof(HGTAG) > sizeof(HGBRANCH)) ?
sizeof(HGTAG) : sizeof(HGBRANCH))];
const char *revision = _Py_hgversion();
const char *sep = *revision ? ":" : "";
const char *hgid = _Py_hgidentifier();
if (!(*hgid))
hgid = "default";
PyOS_snprintf(buildinfo, sizeof(buildinfo),
"%s%s%s, %.20s, %.9s", hgid, sep, revision,
DATE, TIME);
return buildinfo;
}
const char *
_Py_svnversion(void)
{
/* the following string can be modified by subwcrev.exe */
static const char svnversion[] = SVNVERSION;
if (svnversion[0] != '$')
return svnversion; /* it was interpolated, or passed on command line */
return "Unversioned directory";
}
const char *
_Py_hgversion(void)
{
return HGVERSION;
}
const char *
_Py_hgidentifier(void)
{
const char *hgtag, *hgid;
hgtag = HGTAG;
if ((*hgtag) && strcmp(hgtag, "tip") != 0)
hgid = hgtag;
else
hgid = HGBRANCH;
return hgid;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,394 @@
/*
Copyright (C) 1999, 2000, 2002 Aladdin Enterprises. All rights reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
L. Peter Deutsch
ghost@aladdin.com
*/
/* $Id: md5.c,v 1.6 2002/04/13 19:20:28 lpd Exp $ */
/*
Independent implementation of MD5 (RFC 1321).
This code implements the MD5 Algorithm defined in RFC 1321, whose
text is available at
http://www.ietf.org/rfc/rfc1321.txt
The code is derived from the text of the RFC, including the test suite
(section A.5) but excluding the rest of Appendix A. It does not include
any code or documentation that is identified in the RFC as being
copyrighted.
The original and principal author of md5.c is L. Peter Deutsch
<ghost@aladdin.com>. Other authors are noted in the change history
that follows (in reverse chronological order):
2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order
either statically or dynamically; added missing #include <string.h>
in library.
2002-03-11 lpd Corrected argument list for main(), and added int return
type, in test program and T value program.
2002-02-21 lpd Added missing #include <stdio.h> in test program.
2000-07-03 lpd Patched to eliminate warnings about "constant is
unsigned in ANSI C, signed in traditional"; made test program
self-checking.
1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5).
1999-05-03 lpd Original version.
*/
#include "md5.h"
#include <string.h>
#include <limits.h>
#undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */
#ifdef ARCH_IS_BIG_ENDIAN
# define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1)
#else
# define BYTE_ORDER 0
#endif
#define T_MASK ((md5_word_t)~0)
#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
#define T3 0x242070db
#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
#define T6 0x4787c62a
#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
#define T9 0x698098d8
#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
#define T13 0x6b901122
#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
#define T16 0x49b40821
#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
#define T19 0x265e5a51
#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
#define T22 0x02441453
#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
#define T25 0x21e1cde6
#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
#define T28 0x455a14ed
#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
#define T31 0x676f02d9
#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
#define T35 0x6d9d6122
#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
#define T38 0x4bdecfa9
#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
#define T41 0x289b7ec6
#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
#define T44 0x04881d05
#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
#define T47 0x1fa27cf8
#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
#define T50 0x432aff97
#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
#define T53 0x655b59c3
#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
#define T57 0x6fa87e4f
#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
#define T60 0x4e0811a1
#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
#define T63 0x2ad7d2bb
#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
static void
md5_process(md5_state_t *pms, const md5_byte_t *data /*[64]*/)
{
md5_word_t
a = pms->abcd[0], b = pms->abcd[1],
c = pms->abcd[2], d = pms->abcd[3];
md5_word_t t;
#if BYTE_ORDER > 0
/* Define storage only for big-endian CPUs. */
md5_word_t X[16];
#else
/* Define storage for little-endian or both types of CPUs. */
md5_word_t xbuf[16];
const md5_word_t *X;
#endif
{
#if BYTE_ORDER == 0
/*
* Determine dynamically whether this is a big-endian or
* little-endian machine, since we can use a more efficient
* algorithm on the latter.
*/
static const int w = 1;
if (*((const md5_byte_t *)&w)) /* dynamic little-endian */
#endif
#if BYTE_ORDER <= 0 /* little-endian */
{
/*
* On little-endian machines, we can process properly aligned
* data without copying it.
*/
if (!((data - (const md5_byte_t *)0) & 3)) {
/* data are properly aligned */
X = (const md5_word_t *)data;
} else {
/* not aligned */
memcpy(xbuf, data, 64);
X = xbuf;
}
}
#endif
#if BYTE_ORDER == 0
else /* dynamic big-endian */
#endif
#if BYTE_ORDER >= 0 /* big-endian */
{
/*
* On big-endian machines, we must arrange the bytes in the
* right order.
*/
const md5_byte_t *xp = data;
int i;
# if BYTE_ORDER == 0
X = xbuf; /* (dynamic only) */
# else
# define xbuf X /* (static only) */
# endif
for (i = 0; i < 16; ++i, xp += 4)
xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24);
}
#endif
}
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
/* Round 1. */
/* Let [abcd k s i] denote the operation
a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
#define SET(a, b, c, d, k, s, Ti)\
t = a + F(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b
/* Do the following 16 operations. */
SET(a, b, c, d, 0, 7, T1);
SET(d, a, b, c, 1, 12, T2);
SET(c, d, a, b, 2, 17, T3);
SET(b, c, d, a, 3, 22, T4);
SET(a, b, c, d, 4, 7, T5);
SET(d, a, b, c, 5, 12, T6);
SET(c, d, a, b, 6, 17, T7);
SET(b, c, d, a, 7, 22, T8);
SET(a, b, c, d, 8, 7, T9);
SET(d, a, b, c, 9, 12, T10);
SET(c, d, a, b, 10, 17, T11);
SET(b, c, d, a, 11, 22, T12);
SET(a, b, c, d, 12, 7, T13);
SET(d, a, b, c, 13, 12, T14);
SET(c, d, a, b, 14, 17, T15);
SET(b, c, d, a, 15, 22, T16);
#undef SET
/* Round 2. */
/* Let [abcd k s i] denote the operation
a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
#define SET(a, b, c, d, k, s, Ti)\
t = a + G(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b
/* Do the following 16 operations. */
SET(a, b, c, d, 1, 5, T17);
SET(d, a, b, c, 6, 9, T18);
SET(c, d, a, b, 11, 14, T19);
SET(b, c, d, a, 0, 20, T20);
SET(a, b, c, d, 5, 5, T21);
SET(d, a, b, c, 10, 9, T22);
SET(c, d, a, b, 15, 14, T23);
SET(b, c, d, a, 4, 20, T24);
SET(a, b, c, d, 9, 5, T25);
SET(d, a, b, c, 14, 9, T26);
SET(c, d, a, b, 3, 14, T27);
SET(b, c, d, a, 8, 20, T28);
SET(a, b, c, d, 13, 5, T29);
SET(d, a, b, c, 2, 9, T30);
SET(c, d, a, b, 7, 14, T31);
SET(b, c, d, a, 12, 20, T32);
#undef SET
/* Round 3. */
/* Let [abcd k s t] denote the operation
a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define SET(a, b, c, d, k, s, Ti)\
t = a + H(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b
/* Do the following 16 operations. */
SET(a, b, c, d, 5, 4, T33);
SET(d, a, b, c, 8, 11, T34);
SET(c, d, a, b, 11, 16, T35);
SET(b, c, d, a, 14, 23, T36);
SET(a, b, c, d, 1, 4, T37);
SET(d, a, b, c, 4, 11, T38);
SET(c, d, a, b, 7, 16, T39);
SET(b, c, d, a, 10, 23, T40);
SET(a, b, c, d, 13, 4, T41);
SET(d, a, b, c, 0, 11, T42);
SET(c, d, a, b, 3, 16, T43);
SET(b, c, d, a, 6, 23, T44);
SET(a, b, c, d, 9, 4, T45);
SET(d, a, b, c, 12, 11, T46);
SET(c, d, a, b, 15, 16, T47);
SET(b, c, d, a, 2, 23, T48);
#undef SET
/* Round 4. */
/* Let [abcd k s t] denote the operation
a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
#define SET(a, b, c, d, k, s, Ti)\
t = a + I(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b
/* Do the following 16 operations. */
SET(a, b, c, d, 0, 6, T49);
SET(d, a, b, c, 7, 10, T50);
SET(c, d, a, b, 14, 15, T51);
SET(b, c, d, a, 5, 21, T52);
SET(a, b, c, d, 12, 6, T53);
SET(d, a, b, c, 3, 10, T54);
SET(c, d, a, b, 10, 15, T55);
SET(b, c, d, a, 1, 21, T56);
SET(a, b, c, d, 8, 6, T57);
SET(d, a, b, c, 15, 10, T58);
SET(c, d, a, b, 6, 15, T59);
SET(b, c, d, a, 13, 21, T60);
SET(a, b, c, d, 4, 6, T61);
SET(d, a, b, c, 11, 10, T62);
SET(c, d, a, b, 2, 15, T63);
SET(b, c, d, a, 9, 21, T64);
#undef SET
/* Then perform the following additions. (That is increment each
of the four registers by the value it had before this block
was started.) */
pms->abcd[0] += a;
pms->abcd[1] += b;
pms->abcd[2] += c;
pms->abcd[3] += d;
}
void
md5_init(md5_state_t *pms)
{
pms->count[0] = pms->count[1] = 0;
pms->abcd[0] = 0x67452301;
pms->abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476;
pms->abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301;
pms->abcd[3] = 0x10325476;
}
void
md5_append(md5_state_t *pms, const md5_byte_t *data, unsigned int nbytes)
{
const md5_byte_t *p = data;
unsigned int left = nbytes;
unsigned int offset = (pms->count[0] >> 3) & 63;
md5_word_t nbits = (md5_word_t)(nbytes << 3);
if (nbytes <= 0)
return;
/* this special case is handled recursively */
if (nbytes > INT_MAX - offset) {
unsigned int overlap;
/* handle the append in two steps to prevent overflow */
overlap = 64 - offset;
md5_append(pms, data, overlap);
md5_append(pms, data + overlap, nbytes - overlap);
return;
}
/* Update the message length. */
pms->count[1] += nbytes >> 29;
pms->count[0] += nbits;
if (pms->count[0] < nbits)
pms->count[1]++;
/* Process an initial partial block. */
if (offset) {
unsigned int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
memcpy(pms->buf + offset, p, copy);
if (offset + copy < 64)
return;
p += copy;
left -= copy;
md5_process(pms, pms->buf);
}
/* Process full blocks. */
for (; left >= 64; p += 64, left -= 64)
md5_process(pms, p);
/* Process a final partial block. */
if (left)
memcpy(pms->buf, p, left);
}
void
md5_finish(md5_state_t *pms, md5_byte_t digest[16])
{
static const md5_byte_t pad[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
md5_byte_t data[8];
int i;
/* Save the length before padding. */
for (i = 0; i < 8; ++i)
data[i] = (md5_byte_t)(pms->count[i >> 2] >> ((i & 3) << 3));
/* Pad to 56 bytes mod 64. */
md5_append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1);
/* Append the length. */
md5_append(pms, data, 8);
for (i = 0; i < 16; ++i)
digest[i] = (md5_byte_t)(pms->abcd[i >> 2] >> ((i & 3) << 3));
}

View File

@ -0,0 +1,91 @@
/*
Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
L. Peter Deutsch
ghost@aladdin.com
*/
/* $Id$ */
/*
Independent implementation of MD5 (RFC 1321).
This code implements the MD5 Algorithm defined in RFC 1321, whose
text is available at
http://www.ietf.org/rfc/rfc1321.txt
The code is derived from the text of the RFC, including the test suite
(section A.5) but excluding the rest of Appendix A. It does not include
any code or documentation that is identified in the RFC as being
copyrighted.
The original and principal author of md5.h is L. Peter Deutsch
<ghost@aladdin.com>. Other authors are noted in the change history
that follows (in reverse chronological order):
2002-04-13 lpd Removed support for non-ANSI compilers; removed
references to Ghostscript; clarified derivation from RFC 1321;
now handles byte order either statically or dynamically.
1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5);
added conditionalization for C++ compilation from Martin
Purschke <purschke@bnl.gov>.
1999-05-03 lpd Original version.
*/
#ifndef md5_INCLUDED
# define md5_INCLUDED
/*
* This package supports both compile-time and run-time determination of CPU
* byte order. If ARCH_IS_BIG_ENDIAN is defined as 0, the code will be
* compiled to run only on little-endian CPUs; if ARCH_IS_BIG_ENDIAN is
* defined as non-zero, the code will be compiled to run only on big-endian
* CPUs; if ARCH_IS_BIG_ENDIAN is not defined, the code will be compiled to
* run on either big- or little-endian CPUs, but will run slightly less
* efficiently on either one than if ARCH_IS_BIG_ENDIAN is defined.
*/
typedef unsigned char md5_byte_t; /* 8-bit byte */
typedef unsigned int md5_word_t; /* 32-bit word */
/* Define the state of the MD5 Algorithm. */
typedef struct md5_state_s {
md5_word_t count[2]; /* message length in bits, lsw first */
md5_word_t abcd[4]; /* digest buffer */
md5_byte_t buf[64]; /* accumulate block */
} md5_state_t;
#ifdef __cplusplus
extern "C"
{
#endif
/* Initialize the algorithm. */
void md5_init(md5_state_t *pms);
/* Append a string to the message. */
void md5_append(md5_state_t *pms, const md5_byte_t *data, unsigned int nbytes);
/* Finish the message and return the digest. */
void md5_finish(md5_state_t *pms, md5_byte_t digest[16]);
#ifdef __cplusplus
} /* end extern "C" */
#endif
#endif /* md5_INCLUDED */

View File

@ -0,0 +1,339 @@
/* MD5 module */
/* This module provides an interface to the RSA Data Security,
Inc. MD5 Message-Digest Algorithm, described in RFC 1321.
It requires the files md5c.c and md5.h (which are slightly changed
from the versions in the RFC to avoid the "global.h" file.) */
/* MD5 objects */
#include "Python.h"
#include "structmember.h"
#include "md5.h"
typedef struct {
PyObject_HEAD
md5_state_t md5; /* the context holder */
} md5object;
static PyTypeObject MD5type;
#define is_md5object(v) ((v)->ob_type == &MD5type)
static md5object *
newmd5object(void)
{
md5object *md5p;
md5p = PyObject_New(md5object, &MD5type);
if (md5p == NULL)
return NULL;
md5_init(&md5p->md5); /* actual initialisation */
return md5p;
}
/* MD5 methods */
static void
md5_dealloc(md5object *md5p)
{
PyObject_Del(md5p);
}
/* MD5 methods-as-attributes */
static PyObject *
md5_update(md5object *self, PyObject *args)
{
Py_buffer view;
Py_ssize_t n;
unsigned char *buf;
if (!PyArg_ParseTuple(args, "s*:update", &view))
return NULL;
n = view.len;
buf = (unsigned char *) view.buf;
while (n > 0) {
Py_ssize_t nbytes;
if (n > INT_MAX)
nbytes = INT_MAX;
else
nbytes = n;
md5_append(&self->md5, buf,
Py_SAFE_DOWNCAST(nbytes, Py_ssize_t, unsigned int));
buf += nbytes;
n -= nbytes;
}
PyBuffer_Release(&view);
Py_RETURN_NONE;
}
PyDoc_STRVAR(update_doc,
"update (arg)\n\
\n\
Update the md5 object with the string arg. Repeated calls are\n\
equivalent to a single call with the concatenation of all the\n\
arguments.");
static PyObject *
md5_digest(md5object *self)
{
md5_state_t mdContext;
unsigned char aDigest[16];
/* make a temporary copy, and perform the final */
mdContext = self->md5;
md5_finish(&mdContext, aDigest);
return PyString_FromStringAndSize((char *)aDigest, 16);
}
PyDoc_STRVAR(digest_doc,
"digest() -> string\n\
\n\
Return the digest of the strings passed to the update() method so\n\
far. This is a 16-byte string which may contain non-ASCII characters,\n\
including null bytes.");
static PyObject *
md5_hexdigest(md5object *self)
{
md5_state_t mdContext;
unsigned char digest[16];
unsigned char hexdigest[32];
int i, j;
/* make a temporary copy, and perform the final */
mdContext = self->md5;
md5_finish(&mdContext, digest);
/* Make hex version of the digest */
for(i=j=0; i<16; i++) {
char c;
c = (digest[i] >> 4) & 0xf;
c = (c>9) ? c+'a'-10 : c + '0';
hexdigest[j++] = c;
c = (digest[i] & 0xf);
c = (c>9) ? c+'a'-10 : c + '0';
hexdigest[j++] = c;
}
return PyString_FromStringAndSize((char*)hexdigest, 32);
}
PyDoc_STRVAR(hexdigest_doc,
"hexdigest() -> string\n\
\n\
Like digest(), but returns the digest as a string of hexadecimal digits.");
static PyObject *
md5_copy(md5object *self)
{
md5object *md5p;
if ((md5p = newmd5object()) == NULL)
return NULL;
md5p->md5 = self->md5;
return (PyObject *)md5p;
}
PyDoc_STRVAR(copy_doc,
"copy() -> md5 object\n\
\n\
Return a copy (``clone'') of the md5 object.");
static PyMethodDef md5_methods[] = {
{"update", (PyCFunction)md5_update, METH_VARARGS, update_doc},
{"digest", (PyCFunction)md5_digest, METH_NOARGS, digest_doc},
{"hexdigest", (PyCFunction)md5_hexdigest, METH_NOARGS, hexdigest_doc},
{"copy", (PyCFunction)md5_copy, METH_NOARGS, copy_doc},
{NULL, NULL} /* sentinel */
};
static PyObject *
md5_get_block_size(PyObject *self, void *closure)
{
return PyInt_FromLong(64);
}
static PyObject *
md5_get_digest_size(PyObject *self, void *closure)
{
return PyInt_FromLong(16);
}
static PyObject *
md5_get_name(PyObject *self, void *closure)
{
return PyString_FromStringAndSize("MD5", 3);
}
static PyGetSetDef md5_getseters[] = {
{"digest_size",
(getter)md5_get_digest_size, NULL,
NULL,
NULL},
{"block_size",
(getter)md5_get_block_size, NULL,
NULL,
NULL},
{"name",
(getter)md5_get_name, NULL,
NULL,
NULL},
/* the old md5 and sha modules support 'digest_size' as in PEP 247.
* the old sha module also supported 'digestsize'. ugh. */
{"digestsize",
(getter)md5_get_digest_size, NULL,
NULL,
NULL},
{NULL} /* Sentinel */
};
PyDoc_STRVAR(module_doc,
"This module implements the interface to RSA's MD5 message digest\n\
algorithm (see also Internet RFC 1321). Its use is quite\n\
straightforward: use the new() to create an md5 object. You can now\n\
feed this object with arbitrary strings using the update() method, and\n\
at any point you can ask it for the digest (a strong kind of 128-bit\n\
checksum, a.k.a. ``fingerprint'') of the concatenation of the strings\n\
fed to it so far using the digest() method.\n\
\n\
Functions:\n\
\n\
new([arg]) -- return a new md5 object, initialized with arg if provided\n\
md5([arg]) -- DEPRECATED, same as new, but for compatibility\n\
\n\
Special Objects:\n\
\n\
MD5Type -- type object for md5 objects");
PyDoc_STRVAR(md5type_doc,
"An md5 represents the object used to calculate the MD5 checksum of a\n\
string of information.\n\
\n\
Methods:\n\
\n\
update() -- updates the current digest with an additional string\n\
digest() -- return the current digest value\n\
hexdigest() -- return the current digest as a string of hexadecimal digits\n\
copy() -- return a copy of the current md5 object");
static PyTypeObject MD5type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_md5.md5", /*tp_name*/
sizeof(md5object), /*tp_size*/
0, /*tp_itemsize*/
/* methods */
(destructor)md5_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
md5type_doc, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
md5_methods, /*tp_methods*/
0, /*tp_members*/
md5_getseters, /*tp_getset*/
};
/* MD5 functions */
static PyObject *
MD5_new(PyObject *self, PyObject *args)
{
md5object *md5p;
Py_buffer view = { 0 };
Py_ssize_t n;
unsigned char *buf;
if (!PyArg_ParseTuple(args, "|s*:new", &view))
return NULL;
if ((md5p = newmd5object()) == NULL) {
PyBuffer_Release(&view);
return NULL;
}
n = view.len;
buf = (unsigned char *) view.buf;
while (n > 0) {
Py_ssize_t nbytes;
if (n > INT_MAX)
nbytes = INT_MAX;
else
nbytes = n;
md5_append(&md5p->md5, buf,
Py_SAFE_DOWNCAST(nbytes, Py_ssize_t, unsigned int));
buf += nbytes;
n -= nbytes;
}
PyBuffer_Release(&view);
return (PyObject *)md5p;
}
PyDoc_STRVAR(new_doc,
"new([arg]) -> md5 object\n\
\n\
Return a new md5 object. If arg is present, the method call update(arg)\n\
is made.");
/* List of functions exported by this module */
static PyMethodDef md5_functions[] = {
{"new", (PyCFunction)MD5_new, METH_VARARGS, new_doc},
{NULL, NULL} /* Sentinel */
};
/* Initialize this module. */
PyMODINIT_FUNC
init_md5(void)
{
PyObject *m, *d;
Py_TYPE(&MD5type) = &PyType_Type;
if (PyType_Ready(&MD5type) < 0)
return;
m = Py_InitModule3("_md5", md5_functions, module_doc);
if (m == NULL)
return;
d = PyModule_GetDict(m);
PyDict_SetItemString(d, "MD5Type", (PyObject *)&MD5type);
PyModule_AddIntConstant(m, "digest_size", 16);
/* No need to check the error here, the caller will do that */
}

View File

@ -0,0 +1,930 @@
#include "Python.h"
PyDoc_STRVAR(operator_doc,
"Operator interface.\n\
\n\
This module exports a set of functions implemented in C corresponding\n\
to the intrinsic operators of Python. For example, operator.add(x, y)\n\
is equivalent to the expression x+y. The function names are those\n\
used for special methods; variants without leading and trailing\n\
'__' are also provided for convenience.");
#define spam1(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a1) { \
return AOP(a1); }
#define spam2(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1, *a2; \
if(! PyArg_UnpackTuple(a,#OP,2,2,&a1,&a2)) return NULL; \
return AOP(a1,a2); }
#define spamoi(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1; int a2; \
if(! PyArg_ParseTuple(a,"Oi:" #OP,&a1,&a2)) return NULL; \
return AOP(a1,a2); }
#define spam2n(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1, *a2; \
if(! PyArg_UnpackTuple(a,#OP,2,2,&a1,&a2)) return NULL; \
if(-1 == AOP(a1,a2)) return NULL; \
Py_INCREF(Py_None); \
return Py_None; }
#define spam3n(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1, *a2, *a3; \
if(! PyArg_UnpackTuple(a,#OP,3,3,&a1,&a2,&a3)) return NULL; \
if(-1 == AOP(a1,a2,a3)) return NULL; \
Py_INCREF(Py_None); \
return Py_None; }
#define spami(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a1) { \
long r; \
if(-1 == (r=AOP(a1))) return NULL; \
return PyBool_FromLong(r); }
#define spami2(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1, *a2; long r; \
if(! PyArg_UnpackTuple(a,#OP,2,2,&a1,&a2)) return NULL; \
if(-1 == (r=AOP(a1,a2))) return NULL; \
return PyInt_FromLong(r); }
#define spamn2(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1, *a2; Py_ssize_t r; \
if(! PyArg_UnpackTuple(a,#OP,2,2,&a1,&a2)) return NULL; \
if(-1 == (r=AOP(a1,a2))) return NULL; \
return PyInt_FromSsize_t(r); }
#define spami2b(OP,AOP) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1, *a2; long r; \
if(! PyArg_UnpackTuple(a,#OP,2,2,&a1,&a2)) return NULL; \
if(-1 == (r=AOP(a1,a2))) return NULL; \
return PyBool_FromLong(r); }
#define spamrc(OP,A) static PyObject *OP(PyObject *s, PyObject *a) { \
PyObject *a1, *a2; \
if(! PyArg_UnpackTuple(a,#OP,2,2,&a1,&a2)) return NULL; \
return PyObject_RichCompare(a1,a2,A); }
/* Deprecated operators that need warnings. */
static int
op_isCallable(PyObject *x)
{
if (PyErr_WarnPy3k("operator.isCallable() is not supported in 3.x. "
"Use hasattr(obj, '__call__').", 1) < 0)
return -1;
return PyCallable_Check(x);
}
static int
op_sequenceIncludes(PyObject *seq, PyObject* ob)
{
if (PyErr_WarnPy3k("operator.sequenceIncludes() is not supported "
"in 3.x. Use operator.contains().", 1) < 0)
return -1;
return PySequence_Contains(seq, ob);
}
spami(isCallable , op_isCallable)
spami(isNumberType , PyNumber_Check)
spami(truth , PyObject_IsTrue)
spam2(op_add , PyNumber_Add)
spam2(op_sub , PyNumber_Subtract)
spam2(op_mul , PyNumber_Multiply)
spam2(op_div , PyNumber_Divide)
spam2(op_floordiv , PyNumber_FloorDivide)
spam2(op_truediv , PyNumber_TrueDivide)
spam2(op_mod , PyNumber_Remainder)
spam1(op_neg , PyNumber_Negative)
spam1(op_pos , PyNumber_Positive)
spam1(op_abs , PyNumber_Absolute)
spam1(op_inv , PyNumber_Invert)
spam1(op_invert , PyNumber_Invert)
spam2(op_lshift , PyNumber_Lshift)
spam2(op_rshift , PyNumber_Rshift)
spami(op_not_ , PyObject_Not)
spam2(op_and_ , PyNumber_And)
spam2(op_xor , PyNumber_Xor)
spam2(op_or_ , PyNumber_Or)
spam2(op_iadd , PyNumber_InPlaceAdd)
spam2(op_isub , PyNumber_InPlaceSubtract)
spam2(op_imul , PyNumber_InPlaceMultiply)
spam2(op_idiv , PyNumber_InPlaceDivide)
spam2(op_ifloordiv , PyNumber_InPlaceFloorDivide)
spam2(op_itruediv , PyNumber_InPlaceTrueDivide)
spam2(op_imod , PyNumber_InPlaceRemainder)
spam2(op_ilshift , PyNumber_InPlaceLshift)
spam2(op_irshift , PyNumber_InPlaceRshift)
spam2(op_iand , PyNumber_InPlaceAnd)
spam2(op_ixor , PyNumber_InPlaceXor)
spam2(op_ior , PyNumber_InPlaceOr)
spami(isSequenceType , PySequence_Check)
spam2(op_concat , PySequence_Concat)
spamoi(op_repeat , PySequence_Repeat)
spam2(op_iconcat , PySequence_InPlaceConcat)
spamoi(op_irepeat , PySequence_InPlaceRepeat)
spami2b(op_contains , PySequence_Contains)
spami2b(sequenceIncludes, op_sequenceIncludes)
spamn2(indexOf , PySequence_Index)
spamn2(countOf , PySequence_Count)
spami(isMappingType , PyMapping_Check)
spam2(op_getitem , PyObject_GetItem)
spam2n(op_delitem , PyObject_DelItem)
spam3n(op_setitem , PyObject_SetItem)
spamrc(op_lt , Py_LT)
spamrc(op_le , Py_LE)
spamrc(op_eq , Py_EQ)
spamrc(op_ne , Py_NE)
spamrc(op_gt , Py_GT)
spamrc(op_ge , Py_GE)
static PyObject*
op_pow(PyObject *s, PyObject *a)
{
PyObject *a1, *a2;
if (PyArg_UnpackTuple(a,"pow", 2, 2, &a1, &a2))
return PyNumber_Power(a1, a2, Py_None);
return NULL;
}
static PyObject*
op_ipow(PyObject *s, PyObject *a)
{
PyObject *a1, *a2;
if (PyArg_UnpackTuple(a,"ipow", 2, 2, &a1, &a2))
return PyNumber_InPlacePower(a1, a2, Py_None);
return NULL;
}
static PyObject *
op_index(PyObject *s, PyObject *a)
{
return PyNumber_Index(a);
}
static PyObject*
is_(PyObject *s, PyObject *a)
{
PyObject *a1, *a2, *result = NULL;
if (PyArg_UnpackTuple(a,"is_", 2, 2, &a1, &a2)) {
result = (a1 == a2) ? Py_True : Py_False;
Py_INCREF(result);
}
return result;
}
static PyObject*
is_not(PyObject *s, PyObject *a)
{
PyObject *a1, *a2, *result = NULL;
if (PyArg_UnpackTuple(a,"is_not", 2, 2, &a1, &a2)) {
result = (a1 != a2) ? Py_True : Py_False;
Py_INCREF(result);
}
return result;
}
static PyObject*
op_getslice(PyObject *s, PyObject *a)
{
PyObject *a1;
Py_ssize_t a2, a3;
if (!PyArg_ParseTuple(a, "Onn:getslice", &a1, &a2, &a3))
return NULL;
return PySequence_GetSlice(a1, a2, a3);
}
static PyObject*
op_setslice(PyObject *s, PyObject *a)
{
PyObject *a1, *a4;
Py_ssize_t a2, a3;
if (!PyArg_ParseTuple(a, "OnnO:setslice", &a1, &a2, &a3, &a4))
return NULL;
if (-1 == PySequence_SetSlice(a1, a2, a3, a4))
return NULL;
Py_RETURN_NONE;
}
static PyObject*
op_delslice(PyObject *s, PyObject *a)
{
PyObject *a1;
Py_ssize_t a2, a3;
if (!PyArg_ParseTuple(a, "Onn:delslice", &a1, &a2, &a3))
return NULL;
if (-1 == PySequence_DelSlice(a1, a2, a3))
return NULL;
Py_RETURN_NONE;
}
#undef spam1
#undef spam2
#undef spam1o
#undef spam1o
#define spam1(OP,DOC) {#OP, OP, METH_VARARGS, PyDoc_STR(DOC)},
#define spam2(OP,ALTOP,DOC) {#OP, op_##OP, METH_VARARGS, PyDoc_STR(DOC)}, \
{#ALTOP, op_##OP, METH_VARARGS, PyDoc_STR(DOC)},
#define spam1o(OP,DOC) {#OP, OP, METH_O, PyDoc_STR(DOC)},
#define spam2o(OP,ALTOP,DOC) {#OP, op_##OP, METH_O, PyDoc_STR(DOC)}, \
{#ALTOP, op_##OP, METH_O, PyDoc_STR(DOC)},
/* compare_digest **********************************************************/
/*
* timing safe compare
*
* Returns 1 of the strings are equal.
* In case of len(a) != len(b) the function tries to keep the timing
* dependent on the length of b. CPU cache locally may still alter timing
* a bit.
*/
static int
_tscmp(const unsigned char *a, const unsigned char *b,
Py_ssize_t len_a, Py_ssize_t len_b)
{
/* The volatile type declarations make sure that the compiler has no
* chance to optimize and fold the code in any way that may change
* the timing.
*/
volatile Py_ssize_t length;
volatile const unsigned char *left;
volatile const unsigned char *right;
Py_ssize_t i;
unsigned char result;
/* loop count depends on length of b */
length = len_b;
left = NULL;
right = b;
/* don't use else here to keep the amount of CPU instructions constant,
* volatile forces re-evaluation
* */
if (len_a == length) {
left = *((volatile const unsigned char**)&a);
result = 0;
}
if (len_a != length) {
left = b;
result = 1;
}
for (i=0; i < length; i++) {
result |= *left++ ^ *right++;
}
return (result == 0);
}
PyDoc_STRVAR(compare_digest__doc__,
"compare_digest(a, b) -> bool\n"
"\n"
"Return 'a == b'. This function uses an approach designed to prevent\n"
"timing analysis, making it appropriate for cryptography.\n"
"a and b must both be of the same type: either str (ASCII only),\n"
"or any type that supports the buffer protocol (e.g. bytes).\n"
"\n"
"Note: If a and b are of different lengths, or if an error occurs,\n"
"a timing attack could theoretically reveal information about the\n"
"types and lengths of a and b--but not their values.\n");
static PyObject*
compare_digest(PyObject *self, PyObject *args)
{
PyObject *a, *b;
int rc;
if (!PyArg_ParseTuple(args, "OO:compare_digest", &a, &b)) {
return NULL;
}
/* Unicode string */
if (PyUnicode_Check(a) && PyUnicode_Check(b)) {
rc = _tscmp((const unsigned char *)PyUnicode_AS_DATA(a),
(const unsigned char *)PyUnicode_AS_DATA(b),
PyUnicode_GET_DATA_SIZE(a),
PyUnicode_GET_DATA_SIZE(b));
}
/* fallback to buffer interface for bytes, bytesarray and other */
else {
Py_buffer view_a;
Py_buffer view_b;
if (PyObject_CheckBuffer(a) == 0 && PyObject_CheckBuffer(b) == 0) {
PyErr_Format(PyExc_TypeError,
"unsupported operand types(s) or combination of types: "
"'%.100s' and '%.100s'",
Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
return NULL;
}
if (PyObject_GetBuffer(a, &view_a, PyBUF_SIMPLE) == -1) {
return NULL;
}
if (view_a.ndim > 1) {
PyErr_SetString(PyExc_BufferError,
"Buffer must be single dimension");
PyBuffer_Release(&view_a);
return NULL;
}
if (PyObject_GetBuffer(b, &view_b, PyBUF_SIMPLE) == -1) {
PyBuffer_Release(&view_a);
return NULL;
}
if (view_b.ndim > 1) {
PyErr_SetString(PyExc_BufferError,
"Buffer must be single dimension");
PyBuffer_Release(&view_a);
PyBuffer_Release(&view_b);
return NULL;
}
rc = _tscmp((const unsigned char*)view_a.buf,
(const unsigned char*)view_b.buf,
view_a.len,
view_b.len);
PyBuffer_Release(&view_a);
PyBuffer_Release(&view_b);
}
return PyBool_FromLong(rc);
}
static struct PyMethodDef operator_methods[] = {
spam1o(isCallable,
"isCallable(a) -- Same as callable(a).")
spam1o(isNumberType,
"isNumberType(a) -- Return True if a has a numeric type, False otherwise.")
spam1o(isSequenceType,
"isSequenceType(a) -- Return True if a has a sequence type, False otherwise.")
spam1o(truth,
"truth(a) -- Return True if a is true, False otherwise.")
spam2(contains,__contains__,
"contains(a, b) -- Same as b in a (note reversed operands).")
spam1(sequenceIncludes,
"sequenceIncludes(a, b) -- Same as b in a (note reversed operands; deprecated).")
spam1(indexOf,
"indexOf(a, b) -- Return the first index of b in a.")
spam1(countOf,
"countOf(a, b) -- Return the number of times b occurs in a.")
spam1o(isMappingType,
"isMappingType(a) -- Return True if a has a mapping type, False otherwise.")
spam1(is_, "is_(a, b) -- Same as a is b.")
spam1(is_not, "is_not(a, b) -- Same as a is not b.")
spam2o(index, __index__, "index(a) -- Same as a.__index__()")
spam2(add,__add__, "add(a, b) -- Same as a + b.")
spam2(sub,__sub__, "sub(a, b) -- Same as a - b.")
spam2(mul,__mul__, "mul(a, b) -- Same as a * b.")
spam2(div,__div__, "div(a, b) -- Same as a / b when __future__.division is not in effect.")
spam2(floordiv,__floordiv__, "floordiv(a, b) -- Same as a // b.")
spam2(truediv,__truediv__, "truediv(a, b) -- Same as a / b when __future__.division is in effect.")
spam2(mod,__mod__, "mod(a, b) -- Same as a % b.")
spam2o(neg,__neg__, "neg(a) -- Same as -a.")
spam2o(pos,__pos__, "pos(a) -- Same as +a.")
spam2o(abs,__abs__, "abs(a) -- Same as abs(a).")
spam2o(inv,__inv__, "inv(a) -- Same as ~a.")
spam2o(invert,__invert__, "invert(a) -- Same as ~a.")
spam2(lshift,__lshift__, "lshift(a, b) -- Same as a << b.")
spam2(rshift,__rshift__, "rshift(a, b) -- Same as a >> b.")
spam2o(not_,__not__, "not_(a) -- Same as not a.")
spam2(and_,__and__, "and_(a, b) -- Same as a & b.")
spam2(xor,__xor__, "xor(a, b) -- Same as a ^ b.")
spam2(or_,__or__, "or_(a, b) -- Same as a | b.")
spam2(iadd,__iadd__, "a = iadd(a, b) -- Same as a += b.")
spam2(isub,__isub__, "a = isub(a, b) -- Same as a -= b.")
spam2(imul,__imul__, "a = imul(a, b) -- Same as a *= b.")
spam2(idiv,__idiv__, "a = idiv(a, b) -- Same as a /= b when __future__.division is not in effect.")
spam2(ifloordiv,__ifloordiv__, "a = ifloordiv(a, b) -- Same as a //= b.")
spam2(itruediv,__itruediv__, "a = itruediv(a, b) -- Same as a /= b when __future__.division is in effect.")
spam2(imod,__imod__, "a = imod(a, b) -- Same as a %= b.")
spam2(ilshift,__ilshift__, "a = ilshift(a, b) -- Same as a <<= b.")
spam2(irshift,__irshift__, "a = irshift(a, b) -- Same as a >>= b.")
spam2(iand,__iand__, "a = iand(a, b) -- Same as a &= b.")
spam2(ixor,__ixor__, "a = ixor(a, b) -- Same as a ^= b.")
spam2(ior,__ior__, "a = ior(a, b) -- Same as a |= b.")
spam2(concat,__concat__,
"concat(a, b) -- Same as a + b, for a and b sequences.")
spam2(repeat,__repeat__,
"repeat(a, b) -- Return a * b, where a is a sequence, and b is an integer.")
spam2(iconcat,__iconcat__,
"a = iconcat(a, b) -- Same as a += b, for a and b sequences.")
spam2(irepeat,__irepeat__,
"a = irepeat(a, b) -- Same as a *= b, where a is a sequence, and b is an integer.")
spam2(getitem,__getitem__,
"getitem(a, b) -- Same as a[b].")
spam2(setitem,__setitem__,
"setitem(a, b, c) -- Same as a[b] = c.")
spam2(delitem,__delitem__,
"delitem(a, b) -- Same as del a[b].")
spam2(pow,__pow__, "pow(a, b) -- Same as a ** b.")
spam2(ipow,__ipow__, "a = ipow(a, b) -- Same as a **= b.")
spam2(getslice,__getslice__,
"getslice(a, b, c) -- Same as a[b:c].")
spam2(setslice,__setslice__,
"setslice(a, b, c, d) -- Same as a[b:c] = d.")
spam2(delslice,__delslice__,
"delslice(a, b, c) -- Same as del a[b:c].")
spam2(lt,__lt__, "lt(a, b) -- Same as a<b.")
spam2(le,__le__, "le(a, b) -- Same as a<=b.")
spam2(eq,__eq__, "eq(a, b) -- Same as a==b.")
spam2(ne,__ne__, "ne(a, b) -- Same as a!=b.")
spam2(gt,__gt__, "gt(a, b) -- Same as a>b.")
spam2(ge,__ge__, "ge(a, b) -- Same as a>=b.")
{"_compare_digest", (PyCFunction)compare_digest, METH_VARARGS,
compare_digest__doc__},
{NULL, NULL} /* sentinel */
};
/* itemgetter object **********************************************************/
typedef struct {
PyObject_HEAD
Py_ssize_t nitems;
PyObject *item;
} itemgetterobject;
static PyTypeObject itemgetter_type;
static PyObject *
itemgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
itemgetterobject *ig;
PyObject *item;
Py_ssize_t nitems;
if (!_PyArg_NoKeywords("itemgetter()", kwds))
return NULL;
nitems = PyTuple_GET_SIZE(args);
if (nitems <= 1) {
if (!PyArg_UnpackTuple(args, "itemgetter", 1, 1, &item))
return NULL;
} else
item = args;
/* create itemgetterobject structure */
ig = PyObject_GC_New(itemgetterobject, &itemgetter_type);
if (ig == NULL)
return NULL;
Py_INCREF(item);
ig->item = item;
ig->nitems = nitems;
PyObject_GC_Track(ig);
return (PyObject *)ig;
}
static void
itemgetter_dealloc(itemgetterobject *ig)
{
PyObject_GC_UnTrack(ig);
Py_XDECREF(ig->item);
PyObject_GC_Del(ig);
}
static int
itemgetter_traverse(itemgetterobject *ig, visitproc visit, void *arg)
{
Py_VISIT(ig->item);
return 0;
}
static PyObject *
itemgetter_call(itemgetterobject *ig, PyObject *args, PyObject *kw)
{
PyObject *obj, *result;
Py_ssize_t i, nitems=ig->nitems;
if (!PyArg_UnpackTuple(args, "itemgetter", 1, 1, &obj))
return NULL;
if (nitems == 1)
return PyObject_GetItem(obj, ig->item);
assert(PyTuple_Check(ig->item));
assert(PyTuple_GET_SIZE(ig->item) == nitems);
result = PyTuple_New(nitems);
if (result == NULL)
return NULL;
for (i=0 ; i < nitems ; i++) {
PyObject *item, *val;
item = PyTuple_GET_ITEM(ig->item, i);
val = PyObject_GetItem(obj, item);
if (val == NULL) {
Py_DECREF(result);
return NULL;
}
PyTuple_SET_ITEM(result, i, val);
}
return result;
}
PyDoc_STRVAR(itemgetter_doc,
"itemgetter(item, ...) --> itemgetter object\n\
\n\
Return a callable object that fetches the given item(s) from its operand.\n\
After f = itemgetter(2), the call f(r) returns r[2].\n\
After g = itemgetter(2, 5, 3), the call g(r) returns (r[2], r[5], r[3])");
static PyTypeObject itemgetter_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"operator.itemgetter", /* tp_name */
sizeof(itemgetterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)itemgetter_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(ternaryfunc)itemgetter_call, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
itemgetter_doc, /* tp_doc */
(traverseproc)itemgetter_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
itemgetter_new, /* tp_new */
0, /* tp_free */
};
/* attrgetter object **********************************************************/
typedef struct {
PyObject_HEAD
Py_ssize_t nattrs;
PyObject *attr;
} attrgetterobject;
static PyTypeObject attrgetter_type;
static PyObject *
attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
attrgetterobject *ag;
PyObject *attr;
Py_ssize_t nattrs;
if (!_PyArg_NoKeywords("attrgetter()", kwds))
return NULL;
nattrs = PyTuple_GET_SIZE(args);
if (nattrs <= 1) {
if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &attr))
return NULL;
} else
attr = args;
/* create attrgetterobject structure */
ag = PyObject_GC_New(attrgetterobject, &attrgetter_type);
if (ag == NULL)
return NULL;
Py_INCREF(attr);
ag->attr = attr;
ag->nattrs = nattrs;
PyObject_GC_Track(ag);
return (PyObject *)ag;
}
static void
attrgetter_dealloc(attrgetterobject *ag)
{
PyObject_GC_UnTrack(ag);
Py_XDECREF(ag->attr);
PyObject_GC_Del(ag);
}
static int
attrgetter_traverse(attrgetterobject *ag, visitproc visit, void *arg)
{
Py_VISIT(ag->attr);
return 0;
}
static PyObject *
dotted_getattr(PyObject *obj, PyObject *attr)
{
char *s, *p;
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(attr)) {
attr = _PyUnicode_AsDefaultEncodedString(attr, NULL);
if (attr == NULL)
return NULL;
}
#endif
if (!PyString_Check(attr)) {
PyErr_SetString(PyExc_TypeError,
"attribute name must be a string");
return NULL;
}
s = PyString_AS_STRING(attr);
Py_INCREF(obj);
for (;;) {
PyObject *newobj, *str;
p = strchr(s, '.');
str = p ? PyString_FromStringAndSize(s, (p-s)) :
PyString_FromString(s);
if (str == NULL) {
Py_DECREF(obj);
return NULL;
}
newobj = PyObject_GetAttr(obj, str);
Py_DECREF(str);
Py_DECREF(obj);
if (newobj == NULL)
return NULL;
obj = newobj;
if (p == NULL) break;
s = p+1;
}
return obj;
}
static PyObject *
attrgetter_call(attrgetterobject *ag, PyObject *args, PyObject *kw)
{
PyObject *obj, *result;
Py_ssize_t i, nattrs=ag->nattrs;
if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &obj))
return NULL;
if (ag->nattrs == 1)
return dotted_getattr(obj, ag->attr);
assert(PyTuple_Check(ag->attr));
assert(PyTuple_GET_SIZE(ag->attr) == nattrs);
result = PyTuple_New(nattrs);
if (result == NULL)
return NULL;
for (i=0 ; i < nattrs ; i++) {
PyObject *attr, *val;
attr = PyTuple_GET_ITEM(ag->attr, i);
val = dotted_getattr(obj, attr);
if (val == NULL) {
Py_DECREF(result);
return NULL;
}
PyTuple_SET_ITEM(result, i, val);
}
return result;
}
PyDoc_STRVAR(attrgetter_doc,
"attrgetter(attr, ...) --> attrgetter object\n\
\n\
Return a callable object that fetches the given attribute(s) from its operand.\n\
After f = attrgetter('name'), the call f(r) returns r.name.\n\
After g = attrgetter('name', 'date'), the call g(r) returns (r.name, r.date).\n\
After h = attrgetter('name.first', 'name.last'), the call h(r) returns\n\
(r.name.first, r.name.last).");
static PyTypeObject attrgetter_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"operator.attrgetter", /* tp_name */
sizeof(attrgetterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)attrgetter_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(ternaryfunc)attrgetter_call, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
attrgetter_doc, /* tp_doc */
(traverseproc)attrgetter_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
attrgetter_new, /* tp_new */
0, /* tp_free */
};
/* methodcaller object **********************************************************/
typedef struct {
PyObject_HEAD
PyObject *name;
PyObject *args;
PyObject *kwds;
} methodcallerobject;
static PyTypeObject methodcaller_type;
static PyObject *
methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
methodcallerobject *mc;
PyObject *name, *newargs;
if (PyTuple_GET_SIZE(args) < 1) {
PyErr_SetString(PyExc_TypeError, "methodcaller needs at least "
"one argument, the method name");
return NULL;
}
/* create methodcallerobject structure */
mc = PyObject_GC_New(methodcallerobject, &methodcaller_type);
if (mc == NULL)
return NULL;
newargs = PyTuple_GetSlice(args, 1, PyTuple_GET_SIZE(args));
if (newargs == NULL) {
Py_DECREF(mc);
return NULL;
}
mc->args = newargs;
name = PyTuple_GET_ITEM(args, 0);
Py_INCREF(name);
mc->name = name;
Py_XINCREF(kwds);
mc->kwds = kwds;
PyObject_GC_Track(mc);
return (PyObject *)mc;
}
static void
methodcaller_dealloc(methodcallerobject *mc)
{
PyObject_GC_UnTrack(mc);
Py_XDECREF(mc->name);
Py_XDECREF(mc->args);
Py_XDECREF(mc->kwds);
PyObject_GC_Del(mc);
}
static int
methodcaller_traverse(methodcallerobject *mc, visitproc visit, void *arg)
{
Py_VISIT(mc->args);
Py_VISIT(mc->kwds);
return 0;
}
static PyObject *
methodcaller_call(methodcallerobject *mc, PyObject *args, PyObject *kw)
{
PyObject *method, *obj, *result;
if (!PyArg_UnpackTuple(args, "methodcaller", 1, 1, &obj))
return NULL;
method = PyObject_GetAttr(obj, mc->name);
if (method == NULL)
return NULL;
result = PyObject_Call(method, mc->args, mc->kwds);
Py_DECREF(method);
return result;
}
PyDoc_STRVAR(methodcaller_doc,
"methodcaller(name, ...) --> methodcaller object\n\
\n\
Return a callable object that calls the given method on its operand.\n\
After f = methodcaller('name'), the call f(r) returns r.name().\n\
After g = methodcaller('name', 'date', foo=1), the call g(r) returns\n\
r.name('date', foo=1).");
static PyTypeObject methodcaller_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"operator.methodcaller", /* tp_name */
sizeof(methodcallerobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)methodcaller_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(ternaryfunc)methodcaller_call, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
methodcaller_doc, /* tp_doc */
(traverseproc)methodcaller_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
methodcaller_new, /* tp_new */
0, /* tp_free */
};
/* Initialization function for the module (*must* be called initoperator) */
PyMODINIT_FUNC
initoperator(void)
{
PyObject *m;
/* Create the module and add the functions */
m = Py_InitModule4("operator", operator_methods, operator_doc,
(PyObject*)NULL, PYTHON_API_VERSION);
if (m == NULL)
return;
if (PyType_Ready(&itemgetter_type) < 0)
return;
Py_INCREF(&itemgetter_type);
PyModule_AddObject(m, "itemgetter", (PyObject *)&itemgetter_type);
if (PyType_Ready(&attrgetter_type) < 0)
return;
Py_INCREF(&attrgetter_type);
PyModule_AddObject(m, "attrgetter", (PyObject *)&attrgetter_type);
if (PyType_Ready(&methodcaller_type) < 0)
return;
Py_INCREF(&methodcaller_type);
PyModule_AddObject(m, "methodcaller", (PyObject *)&methodcaller_type);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
/* Minimal main program -- everything is loaded from the library */
#include "Python.h"
#ifdef __FreeBSD__
#include <floatingpoint.h>
#endif
int
main(int argc, char **argv)
{
/* 754 requires that FP exceptions run in "no stop" mode by default,
* and until C vendors implement C99's ways to control FP exceptions,
* Python requires non-stop mode. Alas, some platforms enable FP
* exceptions by default. Here we disable them.
*/
#ifdef __FreeBSD__
fp_except_t m;
m = fpgetmask();
fpsetmask(m & ~FP_X_OFL);
#endif
return Py_Main(argc, argv);
}

View File

@ -0,0 +1,706 @@
/* SHA256 module */
/* This module provides an interface to NIST's SHA-256 and SHA-224 Algorithms */
/* See below for information about the original code this module was
based upon. Additional work performed by:
Andrew Kuchling (amk@amk.ca)
Greg Stein (gstein@lyra.org)
Trevor Perrin (trevp@trevp.net)
Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
Licensed to PSF under a Contributor Agreement.
*/
/* SHA objects */
#include "Python.h"
#include "structmember.h"
/* Endianness testing and definitions */
#define TestEndianness(variable) {int i=1; variable=PCT_BIG_ENDIAN;\
if (*((char*)&i)==1) variable=PCT_LITTLE_ENDIAN;}
#define PCT_LITTLE_ENDIAN 1
#define PCT_BIG_ENDIAN 0
/* Some useful types */
typedef unsigned char SHA_BYTE;
#if SIZEOF_INT == 4
typedef unsigned int SHA_INT32; /* 32-bit integer */
#else
/* not defined. compilation will die. */
#endif
/* The SHA block size and message digest sizes, in bytes */
#define SHA_BLOCKSIZE 64
#define SHA_DIGESTSIZE 32
/* The structure for storing SHA info */
typedef struct {
PyObject_HEAD
SHA_INT32 digest[8]; /* Message digest */
SHA_INT32 count_lo, count_hi; /* 64-bit bit count */
SHA_BYTE data[SHA_BLOCKSIZE]; /* SHA data buffer */
int Endianness;
int local; /* unprocessed amount in data */
int digestsize;
} SHAobject;
/* When run on a little-endian CPU we need to perform byte reversal on an
array of longwords. */
static void longReverse(SHA_INT32 *buffer, int byteCount, int Endianness)
{
SHA_INT32 value;
if ( Endianness == PCT_BIG_ENDIAN )
return;
byteCount /= sizeof(*buffer);
while (byteCount--) {
value = *buffer;
value = ( ( value & 0xFF00FF00L ) >> 8 ) | \
( ( value & 0x00FF00FFL ) << 8 );
*buffer++ = ( value << 16 ) | ( value >> 16 );
}
}
static void SHAcopy(SHAobject *src, SHAobject *dest)
{
dest->Endianness = src->Endianness;
dest->local = src->local;
dest->digestsize = src->digestsize;
dest->count_lo = src->count_lo;
dest->count_hi = src->count_hi;
memcpy(dest->digest, src->digest, sizeof(src->digest));
memcpy(dest->data, src->data, sizeof(src->data));
}
/* ------------------------------------------------------------------------
*
* This code for the SHA-256 algorithm was noted as public domain. The
* original headers are pasted below.
*
* Several changes have been made to make it more compatible with the
* Python environment and desired interface.
*
*/
/* LibTomCrypt, modular cryptographic library -- Tom St Denis
*
* LibTomCrypt is a library that provides various cryptographic
* algorithms in a highly modular and flexible manner.
*
* The library is free for all purposes without any express
* gurantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.org
*/
/* SHA256 by Tom St Denis */
/* Various logical functions */
#define ROR(x, y)\
( ((((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)((y)&31)) | \
((unsigned long)(x)<<(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) (((x | y) & z) | (x & y))
#define S(x, n) ROR((x),(n))
#define R(x, n) (((x)&0xFFFFFFFFUL)>>(n))
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
static void
sha_transform(SHAobject *sha_info)
{
int i;
SHA_INT32 S[8], W[64], t0, t1;
memcpy(W, sha_info->data, sizeof(sha_info->data));
longReverse(W, (int)sizeof(sha_info->data), sha_info->Endianness);
for (i = 16; i < 64; ++i) {
W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
}
for (i = 0; i < 8; ++i) {
S[i] = sha_info->digest[i];
}
/* Compress */
#define RND(a,b,c,d,e,f,g,h,i,ki) \
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \
t1 = Sigma0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1;
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x71374491);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcf);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba5);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25b);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b01);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2);
#undef RND
/* feedback */
for (i = 0; i < 8; i++) {
sha_info->digest[i] = sha_info->digest[i] + S[i];
}
}
/* initialize the SHA digest */
static void
sha_init(SHAobject *sha_info)
{
TestEndianness(sha_info->Endianness)
sha_info->digest[0] = 0x6A09E667L;
sha_info->digest[1] = 0xBB67AE85L;
sha_info->digest[2] = 0x3C6EF372L;
sha_info->digest[3] = 0xA54FF53AL;
sha_info->digest[4] = 0x510E527FL;
sha_info->digest[5] = 0x9B05688CL;
sha_info->digest[6] = 0x1F83D9ABL;
sha_info->digest[7] = 0x5BE0CD19L;
sha_info->count_lo = 0L;
sha_info->count_hi = 0L;
sha_info->local = 0;
sha_info->digestsize = 32;
}
static void
sha224_init(SHAobject *sha_info)
{
TestEndianness(sha_info->Endianness)
sha_info->digest[0] = 0xc1059ed8L;
sha_info->digest[1] = 0x367cd507L;
sha_info->digest[2] = 0x3070dd17L;
sha_info->digest[3] = 0xf70e5939L;
sha_info->digest[4] = 0xffc00b31L;
sha_info->digest[5] = 0x68581511L;
sha_info->digest[6] = 0x64f98fa7L;
sha_info->digest[7] = 0xbefa4fa4L;
sha_info->count_lo = 0L;
sha_info->count_hi = 0L;
sha_info->local = 0;
sha_info->digestsize = 28;
}
/* update the SHA digest */
static void
sha_update(SHAobject *sha_info, SHA_BYTE *buffer, int count)
{
int i;
SHA_INT32 clo;
clo = sha_info->count_lo + ((SHA_INT32) count << 3);
if (clo < sha_info->count_lo) {
++sha_info->count_hi;
}
sha_info->count_lo = clo;
sha_info->count_hi += (SHA_INT32) count >> 29;
if (sha_info->local) {
i = SHA_BLOCKSIZE - sha_info->local;
if (i > count) {
i = count;
}
memcpy(((SHA_BYTE *) sha_info->data) + sha_info->local, buffer, i);
count -= i;
buffer += i;
sha_info->local += i;
if (sha_info->local == SHA_BLOCKSIZE) {
sha_transform(sha_info);
}
else {
return;
}
}
while (count >= SHA_BLOCKSIZE) {
memcpy(sha_info->data, buffer, SHA_BLOCKSIZE);
buffer += SHA_BLOCKSIZE;
count -= SHA_BLOCKSIZE;
sha_transform(sha_info);
}
memcpy(sha_info->data, buffer, count);
sha_info->local = count;
}
/* finish computing the SHA digest */
static void
sha_final(unsigned char digest[SHA_DIGESTSIZE], SHAobject *sha_info)
{
int count;
SHA_INT32 lo_bit_count, hi_bit_count;
lo_bit_count = sha_info->count_lo;
hi_bit_count = sha_info->count_hi;
count = (int) ((lo_bit_count >> 3) & 0x3f);
((SHA_BYTE *) sha_info->data)[count++] = 0x80;
if (count > SHA_BLOCKSIZE - 8) {
memset(((SHA_BYTE *) sha_info->data) + count, 0,
SHA_BLOCKSIZE - count);
sha_transform(sha_info);
memset((SHA_BYTE *) sha_info->data, 0, SHA_BLOCKSIZE - 8);
}
else {
memset(((SHA_BYTE *) sha_info->data) + count, 0,
SHA_BLOCKSIZE - 8 - count);
}
/* GJS: note that we add the hi/lo in big-endian. sha_transform will
swap these values into host-order. */
sha_info->data[56] = (hi_bit_count >> 24) & 0xff;
sha_info->data[57] = (hi_bit_count >> 16) & 0xff;
sha_info->data[58] = (hi_bit_count >> 8) & 0xff;
sha_info->data[59] = (hi_bit_count >> 0) & 0xff;
sha_info->data[60] = (lo_bit_count >> 24) & 0xff;
sha_info->data[61] = (lo_bit_count >> 16) & 0xff;
sha_info->data[62] = (lo_bit_count >> 8) & 0xff;
sha_info->data[63] = (lo_bit_count >> 0) & 0xff;
sha_transform(sha_info);
digest[ 0] = (unsigned char) ((sha_info->digest[0] >> 24) & 0xff);
digest[ 1] = (unsigned char) ((sha_info->digest[0] >> 16) & 0xff);
digest[ 2] = (unsigned char) ((sha_info->digest[0] >> 8) & 0xff);
digest[ 3] = (unsigned char) ((sha_info->digest[0] ) & 0xff);
digest[ 4] = (unsigned char) ((sha_info->digest[1] >> 24) & 0xff);
digest[ 5] = (unsigned char) ((sha_info->digest[1] >> 16) & 0xff);
digest[ 6] = (unsigned char) ((sha_info->digest[1] >> 8) & 0xff);
digest[ 7] = (unsigned char) ((sha_info->digest[1] ) & 0xff);
digest[ 8] = (unsigned char) ((sha_info->digest[2] >> 24) & 0xff);
digest[ 9] = (unsigned char) ((sha_info->digest[2] >> 16) & 0xff);
digest[10] = (unsigned char) ((sha_info->digest[2] >> 8) & 0xff);
digest[11] = (unsigned char) ((sha_info->digest[2] ) & 0xff);
digest[12] = (unsigned char) ((sha_info->digest[3] >> 24) & 0xff);
digest[13] = (unsigned char) ((sha_info->digest[3] >> 16) & 0xff);
digest[14] = (unsigned char) ((sha_info->digest[3] >> 8) & 0xff);
digest[15] = (unsigned char) ((sha_info->digest[3] ) & 0xff);
digest[16] = (unsigned char) ((sha_info->digest[4] >> 24) & 0xff);
digest[17] = (unsigned char) ((sha_info->digest[4] >> 16) & 0xff);
digest[18] = (unsigned char) ((sha_info->digest[4] >> 8) & 0xff);
digest[19] = (unsigned char) ((sha_info->digest[4] ) & 0xff);
digest[20] = (unsigned char) ((sha_info->digest[5] >> 24) & 0xff);
digest[21] = (unsigned char) ((sha_info->digest[5] >> 16) & 0xff);
digest[22] = (unsigned char) ((sha_info->digest[5] >> 8) & 0xff);
digest[23] = (unsigned char) ((sha_info->digest[5] ) & 0xff);
digest[24] = (unsigned char) ((sha_info->digest[6] >> 24) & 0xff);
digest[25] = (unsigned char) ((sha_info->digest[6] >> 16) & 0xff);
digest[26] = (unsigned char) ((sha_info->digest[6] >> 8) & 0xff);
digest[27] = (unsigned char) ((sha_info->digest[6] ) & 0xff);
digest[28] = (unsigned char) ((sha_info->digest[7] >> 24) & 0xff);
digest[29] = (unsigned char) ((sha_info->digest[7] >> 16) & 0xff);
digest[30] = (unsigned char) ((sha_info->digest[7] >> 8) & 0xff);
digest[31] = (unsigned char) ((sha_info->digest[7] ) & 0xff);
}
/*
* End of copied SHA code.
*
* ------------------------------------------------------------------------
*/
static PyTypeObject SHA224type;
static PyTypeObject SHA256type;
static SHAobject *
newSHA224object(void)
{
return (SHAobject *)PyObject_New(SHAobject, &SHA224type);
}
static SHAobject *
newSHA256object(void)
{
return (SHAobject *)PyObject_New(SHAobject, &SHA256type);
}
/* Internal methods for a hash object */
static void
SHA_dealloc(PyObject *ptr)
{
PyObject_Del(ptr);
}
/* External methods for a hash object */
PyDoc_STRVAR(SHA256_copy__doc__, "Return a copy of the hash object.");
static PyObject *
SHA256_copy(SHAobject *self, PyObject *unused)
{
SHAobject *newobj;
if (Py_TYPE(self) == &SHA256type) {
if ( (newobj = newSHA256object())==NULL)
return NULL;
} else {
if ( (newobj = newSHA224object())==NULL)
return NULL;
}
SHAcopy(self, newobj);
return (PyObject *)newobj;
}
PyDoc_STRVAR(SHA256_digest__doc__,
"Return the digest value as a string of binary data.");
static PyObject *
SHA256_digest(SHAobject *self, PyObject *unused)
{
unsigned char digest[SHA_DIGESTSIZE];
SHAobject temp;
SHAcopy(self, &temp);
sha_final(digest, &temp);
return PyString_FromStringAndSize((const char *)digest, self->digestsize);
}
PyDoc_STRVAR(SHA256_hexdigest__doc__,
"Return the digest value as a string of hexadecimal digits.");
static PyObject *
SHA256_hexdigest(SHAobject *self, PyObject *unused)
{
unsigned char digest[SHA_DIGESTSIZE];
SHAobject temp;
PyObject *retval;
char *hex_digest;
int i, j;
/* Get the raw (binary) digest value */
SHAcopy(self, &temp);
sha_final(digest, &temp);
/* Create a new string */
retval = PyString_FromStringAndSize(NULL, self->digestsize * 2);
if (!retval)
return NULL;
hex_digest = PyString_AsString(retval);
if (!hex_digest) {
Py_DECREF(retval);
return NULL;
}
/* Make hex version of the digest */
for(i=j=0; i<self->digestsize; i++) {
char c;
c = (digest[i] >> 4) & 0xf;
c = (c>9) ? c+'a'-10 : c + '0';
hex_digest[j++] = c;
c = (digest[i] & 0xf);
c = (c>9) ? c+'a'-10 : c + '0';
hex_digest[j++] = c;
}
return retval;
}
PyDoc_STRVAR(SHA256_update__doc__,
"Update this hash object's state with the provided string.");
static PyObject *
SHA256_update(SHAobject *self, PyObject *args)
{
Py_buffer buf;
if (!PyArg_ParseTuple(args, "s*:update", &buf))
return NULL;
sha_update(self, buf.buf, buf.len);
PyBuffer_Release(&buf);
Py_RETURN_NONE;
}
static PyMethodDef SHA_methods[] = {
{"copy", (PyCFunction)SHA256_copy, METH_NOARGS, SHA256_copy__doc__},
{"digest", (PyCFunction)SHA256_digest, METH_NOARGS, SHA256_digest__doc__},
{"hexdigest", (PyCFunction)SHA256_hexdigest, METH_NOARGS, SHA256_hexdigest__doc__},
{"update", (PyCFunction)SHA256_update, METH_VARARGS, SHA256_update__doc__},
{NULL, NULL} /* sentinel */
};
static PyObject *
SHA256_get_block_size(PyObject *self, void *closure)
{
return PyInt_FromLong(SHA_BLOCKSIZE);
}
static PyObject *
SHA256_get_name(PyObject *self, void *closure)
{
if (((SHAobject *)self)->digestsize == 32)
return PyString_FromStringAndSize("SHA256", 6);
else
return PyString_FromStringAndSize("SHA224", 6);
}
static PyGetSetDef SHA_getseters[] = {
{"block_size",
(getter)SHA256_get_block_size, NULL,
NULL,
NULL},
{"name",
(getter)SHA256_get_name, NULL,
NULL,
NULL},
{NULL} /* Sentinel */
};
static PyMemberDef SHA_members[] = {
{"digest_size", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL},
/* the old md5 and sha modules support 'digest_size' as in PEP 247.
* the old sha module also supported 'digestsize'. ugh. */
{"digestsize", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL},
{NULL} /* Sentinel */
};
static PyTypeObject SHA224type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_sha256.sha224", /*tp_name*/
sizeof(SHAobject), /*tp_size*/
0, /*tp_itemsize*/
/* methods */
SHA_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
0, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
SHA_methods, /* tp_methods */
SHA_members, /* tp_members */
SHA_getseters, /* tp_getset */
};
static PyTypeObject SHA256type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_sha256.sha256", /*tp_name*/
sizeof(SHAobject), /*tp_size*/
0, /*tp_itemsize*/
/* methods */
SHA_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
0, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
SHA_methods, /* tp_methods */
SHA_members, /* tp_members */
SHA_getseters, /* tp_getset */
};
/* The single module-level function: new() */
PyDoc_STRVAR(SHA256_new__doc__,
"Return a new SHA-256 hash object; optionally initialized with a string.");
static PyObject *
SHA256_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
Py_buffer buf = { 0 };
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s*:new", kwlist,
&buf)) {
return NULL;
}
if ((new = newSHA256object()) == NULL) {
PyBuffer_Release(&buf);
return NULL;
}
sha_init(new);
if (PyErr_Occurred()) {
Py_DECREF(new);
PyBuffer_Release(&buf);
return NULL;
}
if (buf.len > 0) {
sha_update(new, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
return (PyObject *)new;
}
PyDoc_STRVAR(SHA224_new__doc__,
"Return a new SHA-224 hash object; optionally initialized with a string.");
static PyObject *
SHA224_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
Py_buffer buf = { 0 };
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s*:new", kwlist,
&buf)) {
return NULL;
}
if ((new = newSHA224object()) == NULL) {
PyBuffer_Release(&buf);
return NULL;
}
sha224_init(new);
if (PyErr_Occurred()) {
Py_DECREF(new);
PyBuffer_Release(&buf);
return NULL;
}
if (buf.len > 0) {
sha_update(new, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
return (PyObject *)new;
}
/* List of functions exported by this module */
static struct PyMethodDef SHA_functions[] = {
{"sha256", (PyCFunction)SHA256_new, METH_VARARGS|METH_KEYWORDS, SHA256_new__doc__},
{"sha224", (PyCFunction)SHA224_new, METH_VARARGS|METH_KEYWORDS, SHA224_new__doc__},
{NULL, NULL} /* Sentinel */
};
/* Initialize this module. */
#define insint(n,v) { PyModule_AddIntConstant(m,n,v); }
PyMODINIT_FUNC
init_sha256(void)
{
PyObject *m;
Py_TYPE(&SHA224type) = &PyType_Type;
if (PyType_Ready(&SHA224type) < 0)
return;
Py_TYPE(&SHA256type) = &PyType_Type;
if (PyType_Ready(&SHA256type) < 0)
return;
m = Py_InitModule("_sha256", SHA_functions);
if (m == NULL)
return;
}

View File

@ -0,0 +1,774 @@
/* SHA512 module */
/* This module provides an interface to NIST's SHA-512 and SHA-384 Algorithms */
/* See below for information about the original code this module was
based upon. Additional work performed by:
Andrew Kuchling (amk@amk.ca)
Greg Stein (gstein@lyra.org)
Trevor Perrin (trevp@trevp.net)
Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
Licensed to PSF under a Contributor Agreement.
*/
/* SHA objects */
#include "Python.h"
#include "structmember.h"
#ifdef PY_LONG_LONG /* If no PY_LONG_LONG, don't compile anything! */
/* Endianness testing and definitions */
#define TestEndianness(variable) {int i=1; variable=PCT_BIG_ENDIAN;\
if (*((char*)&i)==1) variable=PCT_LITTLE_ENDIAN;}
#define PCT_LITTLE_ENDIAN 1
#define PCT_BIG_ENDIAN 0
/* Some useful types */
typedef unsigned char SHA_BYTE;
#if SIZEOF_INT == 4
typedef unsigned int SHA_INT32; /* 32-bit integer */
typedef unsigned PY_LONG_LONG SHA_INT64; /* 64-bit integer */
#else
/* not defined. compilation will die. */
#endif
/* The SHA block size and message digest sizes, in bytes */
#define SHA_BLOCKSIZE 128
#define SHA_DIGESTSIZE 64
/* The structure for storing SHA info */
typedef struct {
PyObject_HEAD
SHA_INT64 digest[8]; /* Message digest */
SHA_INT32 count_lo, count_hi; /* 64-bit bit count */
SHA_BYTE data[SHA_BLOCKSIZE]; /* SHA data buffer */
int Endianness;
int local; /* unprocessed amount in data */
int digestsize;
} SHAobject;
/* When run on a little-endian CPU we need to perform byte reversal on an
array of longwords. */
static void longReverse(SHA_INT64 *buffer, int byteCount, int Endianness)
{
SHA_INT64 value;
if ( Endianness == PCT_BIG_ENDIAN )
return;
byteCount /= sizeof(*buffer);
while (byteCount--) {
value = *buffer;
((unsigned char*)buffer)[0] = (unsigned char)(value >> 56) & 0xff;
((unsigned char*)buffer)[1] = (unsigned char)(value >> 48) & 0xff;
((unsigned char*)buffer)[2] = (unsigned char)(value >> 40) & 0xff;
((unsigned char*)buffer)[3] = (unsigned char)(value >> 32) & 0xff;
((unsigned char*)buffer)[4] = (unsigned char)(value >> 24) & 0xff;
((unsigned char*)buffer)[5] = (unsigned char)(value >> 16) & 0xff;
((unsigned char*)buffer)[6] = (unsigned char)(value >> 8) & 0xff;
((unsigned char*)buffer)[7] = (unsigned char)(value ) & 0xff;
buffer++;
}
}
static void SHAcopy(SHAobject *src, SHAobject *dest)
{
dest->Endianness = src->Endianness;
dest->local = src->local;
dest->digestsize = src->digestsize;
dest->count_lo = src->count_lo;
dest->count_hi = src->count_hi;
memcpy(dest->digest, src->digest, sizeof(src->digest));
memcpy(dest->data, src->data, sizeof(src->data));
}
/* ------------------------------------------------------------------------
*
* This code for the SHA-512 algorithm was noted as public domain. The
* original headers are pasted below.
*
* Several changes have been made to make it more compatible with the
* Python environment and desired interface.
*
*/
/* LibTomCrypt, modular cryptographic library -- Tom St Denis
*
* LibTomCrypt is a library that provides various cryptographic
* algorithms in a highly modular and flexible manner.
*
* The library is free for all purposes without any express
* gurantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.org
*/
/* SHA512 by Tom St Denis */
/* Various logical functions */
#define ROR64(x, y) \
( ((((x) & Py_ULL(0xFFFFFFFFFFFFFFFF))>>((unsigned PY_LONG_LONG)(y) & 63)) | \
((x)<<((unsigned PY_LONG_LONG)(64-((y) & 63))))) & Py_ULL(0xFFFFFFFFFFFFFFFF))
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) (((x | y) & z) | (x & y))
#define S(x, n) ROR64((x),(n))
#define R(x, n) (((x) & Py_ULL(0xFFFFFFFFFFFFFFFF)) >> ((unsigned PY_LONG_LONG)n))
#define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39))
#define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41))
#define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7))
#define Gamma1(x) (S(x, 19) ^ S(x, 61) ^ R(x, 6))
static void
sha512_transform(SHAobject *sha_info)
{
int i;
SHA_INT64 S[8], W[80], t0, t1;
memcpy(W, sha_info->data, sizeof(sha_info->data));
longReverse(W, (int)sizeof(sha_info->data), sha_info->Endianness);
for (i = 16; i < 80; ++i) {
W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
}
for (i = 0; i < 8; ++i) {
S[i] = sha_info->digest[i];
}
/* Compress */
#define RND(a,b,c,d,e,f,g,h,i,ki) \
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \
t1 = Sigma0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1;
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,Py_ULL(0x428a2f98d728ae22));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,Py_ULL(0x7137449123ef65cd));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,Py_ULL(0xb5c0fbcfec4d3b2f));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,Py_ULL(0xe9b5dba58189dbbc));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,Py_ULL(0x3956c25bf348b538));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,Py_ULL(0x59f111f1b605d019));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,Py_ULL(0x923f82a4af194f9b));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,Py_ULL(0xab1c5ed5da6d8118));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,Py_ULL(0xd807aa98a3030242));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,Py_ULL(0x12835b0145706fbe));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,Py_ULL(0x243185be4ee4b28c));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,Py_ULL(0x550c7dc3d5ffb4e2));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,Py_ULL(0x72be5d74f27b896f));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,Py_ULL(0x80deb1fe3b1696b1));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,Py_ULL(0x9bdc06a725c71235));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,Py_ULL(0xc19bf174cf692694));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,Py_ULL(0xe49b69c19ef14ad2));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,Py_ULL(0xefbe4786384f25e3));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,Py_ULL(0x0fc19dc68b8cd5b5));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,Py_ULL(0x240ca1cc77ac9c65));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,Py_ULL(0x2de92c6f592b0275));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,Py_ULL(0x4a7484aa6ea6e483));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,Py_ULL(0x5cb0a9dcbd41fbd4));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,Py_ULL(0x76f988da831153b5));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,Py_ULL(0x983e5152ee66dfab));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,Py_ULL(0xa831c66d2db43210));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,Py_ULL(0xb00327c898fb213f));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,Py_ULL(0xbf597fc7beef0ee4));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,Py_ULL(0xc6e00bf33da88fc2));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,Py_ULL(0xd5a79147930aa725));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,Py_ULL(0x06ca6351e003826f));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,Py_ULL(0x142929670a0e6e70));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,Py_ULL(0x27b70a8546d22ffc));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,Py_ULL(0x2e1b21385c26c926));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,Py_ULL(0x4d2c6dfc5ac42aed));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,Py_ULL(0x53380d139d95b3df));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,Py_ULL(0x650a73548baf63de));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,Py_ULL(0x766a0abb3c77b2a8));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,Py_ULL(0x81c2c92e47edaee6));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,Py_ULL(0x92722c851482353b));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,Py_ULL(0xa2bfe8a14cf10364));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,Py_ULL(0xa81a664bbc423001));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,Py_ULL(0xc24b8b70d0f89791));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,Py_ULL(0xc76c51a30654be30));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,Py_ULL(0xd192e819d6ef5218));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,Py_ULL(0xd69906245565a910));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,Py_ULL(0xf40e35855771202a));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,Py_ULL(0x106aa07032bbd1b8));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,Py_ULL(0x19a4c116b8d2d0c8));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,Py_ULL(0x1e376c085141ab53));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,Py_ULL(0x2748774cdf8eeb99));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,Py_ULL(0x34b0bcb5e19b48a8));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,Py_ULL(0x391c0cb3c5c95a63));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,Py_ULL(0x4ed8aa4ae3418acb));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,Py_ULL(0x5b9cca4f7763e373));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,Py_ULL(0x682e6ff3d6b2b8a3));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,Py_ULL(0x748f82ee5defb2fc));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,Py_ULL(0x78a5636f43172f60));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,Py_ULL(0x84c87814a1f0ab72));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,Py_ULL(0x8cc702081a6439ec));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,Py_ULL(0x90befffa23631e28));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,Py_ULL(0xa4506cebde82bde9));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,Py_ULL(0xbef9a3f7b2c67915));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,Py_ULL(0xc67178f2e372532b));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],64,Py_ULL(0xca273eceea26619c));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],65,Py_ULL(0xd186b8c721c0c207));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],66,Py_ULL(0xeada7dd6cde0eb1e));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],67,Py_ULL(0xf57d4f7fee6ed178));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],68,Py_ULL(0x06f067aa72176fba));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],69,Py_ULL(0x0a637dc5a2c898a6));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],70,Py_ULL(0x113f9804bef90dae));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],71,Py_ULL(0x1b710b35131c471b));
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],72,Py_ULL(0x28db77f523047d84));
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],73,Py_ULL(0x32caab7b40c72493));
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],74,Py_ULL(0x3c9ebe0a15c9bebc));
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],75,Py_ULL(0x431d67c49c100d4c));
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],76,Py_ULL(0x4cc5d4becb3e42b6));
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],77,Py_ULL(0x597f299cfc657e2a));
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],78,Py_ULL(0x5fcb6fab3ad6faec));
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],79,Py_ULL(0x6c44198c4a475817));
#undef RND
/* feedback */
for (i = 0; i < 8; i++) {
sha_info->digest[i] = sha_info->digest[i] + S[i];
}
}
/* initialize the SHA digest */
static void
sha512_init(SHAobject *sha_info)
{
TestEndianness(sha_info->Endianness)
sha_info->digest[0] = Py_ULL(0x6a09e667f3bcc908);
sha_info->digest[1] = Py_ULL(0xbb67ae8584caa73b);
sha_info->digest[2] = Py_ULL(0x3c6ef372fe94f82b);
sha_info->digest[3] = Py_ULL(0xa54ff53a5f1d36f1);
sha_info->digest[4] = Py_ULL(0x510e527fade682d1);
sha_info->digest[5] = Py_ULL(0x9b05688c2b3e6c1f);
sha_info->digest[6] = Py_ULL(0x1f83d9abfb41bd6b);
sha_info->digest[7] = Py_ULL(0x5be0cd19137e2179);
sha_info->count_lo = 0L;
sha_info->count_hi = 0L;
sha_info->local = 0;
sha_info->digestsize = 64;
}
static void
sha384_init(SHAobject *sha_info)
{
TestEndianness(sha_info->Endianness)
sha_info->digest[0] = Py_ULL(0xcbbb9d5dc1059ed8);
sha_info->digest[1] = Py_ULL(0x629a292a367cd507);
sha_info->digest[2] = Py_ULL(0x9159015a3070dd17);
sha_info->digest[3] = Py_ULL(0x152fecd8f70e5939);
sha_info->digest[4] = Py_ULL(0x67332667ffc00b31);
sha_info->digest[5] = Py_ULL(0x8eb44a8768581511);
sha_info->digest[6] = Py_ULL(0xdb0c2e0d64f98fa7);
sha_info->digest[7] = Py_ULL(0x47b5481dbefa4fa4);
sha_info->count_lo = 0L;
sha_info->count_hi = 0L;
sha_info->local = 0;
sha_info->digestsize = 48;
}
/* update the SHA digest */
static void
sha512_update(SHAobject *sha_info, SHA_BYTE *buffer, int count)
{
int i;
SHA_INT32 clo;
clo = sha_info->count_lo + ((SHA_INT32) count << 3);
if (clo < sha_info->count_lo) {
++sha_info->count_hi;
}
sha_info->count_lo = clo;
sha_info->count_hi += (SHA_INT32) count >> 29;
if (sha_info->local) {
i = SHA_BLOCKSIZE - sha_info->local;
if (i > count) {
i = count;
}
memcpy(((SHA_BYTE *) sha_info->data) + sha_info->local, buffer, i);
count -= i;
buffer += i;
sha_info->local += i;
if (sha_info->local == SHA_BLOCKSIZE) {
sha512_transform(sha_info);
}
else {
return;
}
}
while (count >= SHA_BLOCKSIZE) {
memcpy(sha_info->data, buffer, SHA_BLOCKSIZE);
buffer += SHA_BLOCKSIZE;
count -= SHA_BLOCKSIZE;
sha512_transform(sha_info);
}
memcpy(sha_info->data, buffer, count);
sha_info->local = count;
}
/* finish computing the SHA digest */
static void
sha512_final(unsigned char digest[SHA_DIGESTSIZE], SHAobject *sha_info)
{
int count;
SHA_INT32 lo_bit_count, hi_bit_count;
lo_bit_count = sha_info->count_lo;
hi_bit_count = sha_info->count_hi;
count = (int) ((lo_bit_count >> 3) & 0x7f);
((SHA_BYTE *) sha_info->data)[count++] = 0x80;
if (count > SHA_BLOCKSIZE - 16) {
memset(((SHA_BYTE *) sha_info->data) + count, 0,
SHA_BLOCKSIZE - count);
sha512_transform(sha_info);
memset((SHA_BYTE *) sha_info->data, 0, SHA_BLOCKSIZE - 16);
}
else {
memset(((SHA_BYTE *) sha_info->data) + count, 0,
SHA_BLOCKSIZE - 16 - count);
}
/* GJS: note that we add the hi/lo in big-endian. sha512_transform will
swap these values into host-order. */
sha_info->data[112] = 0;
sha_info->data[113] = 0;
sha_info->data[114] = 0;
sha_info->data[115] = 0;
sha_info->data[116] = 0;
sha_info->data[117] = 0;
sha_info->data[118] = 0;
sha_info->data[119] = 0;
sha_info->data[120] = (hi_bit_count >> 24) & 0xff;
sha_info->data[121] = (hi_bit_count >> 16) & 0xff;
sha_info->data[122] = (hi_bit_count >> 8) & 0xff;
sha_info->data[123] = (hi_bit_count >> 0) & 0xff;
sha_info->data[124] = (lo_bit_count >> 24) & 0xff;
sha_info->data[125] = (lo_bit_count >> 16) & 0xff;
sha_info->data[126] = (lo_bit_count >> 8) & 0xff;
sha_info->data[127] = (lo_bit_count >> 0) & 0xff;
sha512_transform(sha_info);
digest[ 0] = (unsigned char) ((sha_info->digest[0] >> 56) & 0xff);
digest[ 1] = (unsigned char) ((sha_info->digest[0] >> 48) & 0xff);
digest[ 2] = (unsigned char) ((sha_info->digest[0] >> 40) & 0xff);
digest[ 3] = (unsigned char) ((sha_info->digest[0] >> 32) & 0xff);
digest[ 4] = (unsigned char) ((sha_info->digest[0] >> 24) & 0xff);
digest[ 5] = (unsigned char) ((sha_info->digest[0] >> 16) & 0xff);
digest[ 6] = (unsigned char) ((sha_info->digest[0] >> 8) & 0xff);
digest[ 7] = (unsigned char) ((sha_info->digest[0] ) & 0xff);
digest[ 8] = (unsigned char) ((sha_info->digest[1] >> 56) & 0xff);
digest[ 9] = (unsigned char) ((sha_info->digest[1] >> 48) & 0xff);
digest[10] = (unsigned char) ((sha_info->digest[1] >> 40) & 0xff);
digest[11] = (unsigned char) ((sha_info->digest[1] >> 32) & 0xff);
digest[12] = (unsigned char) ((sha_info->digest[1] >> 24) & 0xff);
digest[13] = (unsigned char) ((sha_info->digest[1] >> 16) & 0xff);
digest[14] = (unsigned char) ((sha_info->digest[1] >> 8) & 0xff);
digest[15] = (unsigned char) ((sha_info->digest[1] ) & 0xff);
digest[16] = (unsigned char) ((sha_info->digest[2] >> 56) & 0xff);
digest[17] = (unsigned char) ((sha_info->digest[2] >> 48) & 0xff);
digest[18] = (unsigned char) ((sha_info->digest[2] >> 40) & 0xff);
digest[19] = (unsigned char) ((sha_info->digest[2] >> 32) & 0xff);
digest[20] = (unsigned char) ((sha_info->digest[2] >> 24) & 0xff);
digest[21] = (unsigned char) ((sha_info->digest[2] >> 16) & 0xff);
digest[22] = (unsigned char) ((sha_info->digest[2] >> 8) & 0xff);
digest[23] = (unsigned char) ((sha_info->digest[2] ) & 0xff);
digest[24] = (unsigned char) ((sha_info->digest[3] >> 56) & 0xff);
digest[25] = (unsigned char) ((sha_info->digest[3] >> 48) & 0xff);
digest[26] = (unsigned char) ((sha_info->digest[3] >> 40) & 0xff);
digest[27] = (unsigned char) ((sha_info->digest[3] >> 32) & 0xff);
digest[28] = (unsigned char) ((sha_info->digest[3] >> 24) & 0xff);
digest[29] = (unsigned char) ((sha_info->digest[3] >> 16) & 0xff);
digest[30] = (unsigned char) ((sha_info->digest[3] >> 8) & 0xff);
digest[31] = (unsigned char) ((sha_info->digest[3] ) & 0xff);
digest[32] = (unsigned char) ((sha_info->digest[4] >> 56) & 0xff);
digest[33] = (unsigned char) ((sha_info->digest[4] >> 48) & 0xff);
digest[34] = (unsigned char) ((sha_info->digest[4] >> 40) & 0xff);
digest[35] = (unsigned char) ((sha_info->digest[4] >> 32) & 0xff);
digest[36] = (unsigned char) ((sha_info->digest[4] >> 24) & 0xff);
digest[37] = (unsigned char) ((sha_info->digest[4] >> 16) & 0xff);
digest[38] = (unsigned char) ((sha_info->digest[4] >> 8) & 0xff);
digest[39] = (unsigned char) ((sha_info->digest[4] ) & 0xff);
digest[40] = (unsigned char) ((sha_info->digest[5] >> 56) & 0xff);
digest[41] = (unsigned char) ((sha_info->digest[5] >> 48) & 0xff);
digest[42] = (unsigned char) ((sha_info->digest[5] >> 40) & 0xff);
digest[43] = (unsigned char) ((sha_info->digest[5] >> 32) & 0xff);
digest[44] = (unsigned char) ((sha_info->digest[5] >> 24) & 0xff);
digest[45] = (unsigned char) ((sha_info->digest[5] >> 16) & 0xff);
digest[46] = (unsigned char) ((sha_info->digest[5] >> 8) & 0xff);
digest[47] = (unsigned char) ((sha_info->digest[5] ) & 0xff);
digest[48] = (unsigned char) ((sha_info->digest[6] >> 56) & 0xff);
digest[49] = (unsigned char) ((sha_info->digest[6] >> 48) & 0xff);
digest[50] = (unsigned char) ((sha_info->digest[6] >> 40) & 0xff);
digest[51] = (unsigned char) ((sha_info->digest[6] >> 32) & 0xff);
digest[52] = (unsigned char) ((sha_info->digest[6] >> 24) & 0xff);
digest[53] = (unsigned char) ((sha_info->digest[6] >> 16) & 0xff);
digest[54] = (unsigned char) ((sha_info->digest[6] >> 8) & 0xff);
digest[55] = (unsigned char) ((sha_info->digest[6] ) & 0xff);
digest[56] = (unsigned char) ((sha_info->digest[7] >> 56) & 0xff);
digest[57] = (unsigned char) ((sha_info->digest[7] >> 48) & 0xff);
digest[58] = (unsigned char) ((sha_info->digest[7] >> 40) & 0xff);
digest[59] = (unsigned char) ((sha_info->digest[7] >> 32) & 0xff);
digest[60] = (unsigned char) ((sha_info->digest[7] >> 24) & 0xff);
digest[61] = (unsigned char) ((sha_info->digest[7] >> 16) & 0xff);
digest[62] = (unsigned char) ((sha_info->digest[7] >> 8) & 0xff);
digest[63] = (unsigned char) ((sha_info->digest[7] ) & 0xff);
}
/*
* End of copied SHA code.
*
* ------------------------------------------------------------------------
*/
static PyTypeObject SHA384type;
static PyTypeObject SHA512type;
static SHAobject *
newSHA384object(void)
{
return (SHAobject *)PyObject_New(SHAobject, &SHA384type);
}
static SHAobject *
newSHA512object(void)
{
return (SHAobject *)PyObject_New(SHAobject, &SHA512type);
}
/* Internal methods for a hash object */
static void
SHA512_dealloc(PyObject *ptr)
{
PyObject_Del(ptr);
}
/* External methods for a hash object */
PyDoc_STRVAR(SHA512_copy__doc__, "Return a copy of the hash object.");
static PyObject *
SHA512_copy(SHAobject *self, PyObject *unused)
{
SHAobject *newobj;
if (((PyObject*)self)->ob_type == &SHA512type) {
if ( (newobj = newSHA512object())==NULL)
return NULL;
} else {
if ( (newobj = newSHA384object())==NULL)
return NULL;
}
SHAcopy(self, newobj);
return (PyObject *)newobj;
}
PyDoc_STRVAR(SHA512_digest__doc__,
"Return the digest value as a string of binary data.");
static PyObject *
SHA512_digest(SHAobject *self, PyObject *unused)
{
unsigned char digest[SHA_DIGESTSIZE];
SHAobject temp;
SHAcopy(self, &temp);
sha512_final(digest, &temp);
return PyString_FromStringAndSize((const char *)digest, self->digestsize);
}
PyDoc_STRVAR(SHA512_hexdigest__doc__,
"Return the digest value as a string of hexadecimal digits.");
static PyObject *
SHA512_hexdigest(SHAobject *self, PyObject *unused)
{
unsigned char digest[SHA_DIGESTSIZE];
SHAobject temp;
PyObject *retval;
char *hex_digest;
int i, j;
/* Get the raw (binary) digest value */
SHAcopy(self, &temp);
sha512_final(digest, &temp);
/* Create a new string */
retval = PyString_FromStringAndSize(NULL, self->digestsize * 2);
if (!retval)
return NULL;
hex_digest = PyString_AsString(retval);
if (!hex_digest) {
Py_DECREF(retval);
return NULL;
}
/* Make hex version of the digest */
for (i=j=0; i<self->digestsize; i++) {
char c;
c = (digest[i] >> 4) & 0xf;
c = (c>9) ? c+'a'-10 : c + '0';
hex_digest[j++] = c;
c = (digest[i] & 0xf);
c = (c>9) ? c+'a'-10 : c + '0';
hex_digest[j++] = c;
}
return retval;
}
PyDoc_STRVAR(SHA512_update__doc__,
"Update this hash object's state with the provided string.");
static PyObject *
SHA512_update(SHAobject *self, PyObject *args)
{
Py_buffer buf;
if (!PyArg_ParseTuple(args, "s*:update", &buf))
return NULL;
sha512_update(self, buf.buf, buf.len);
PyBuffer_Release(&buf);
Py_RETURN_NONE;
}
static PyMethodDef SHA_methods[] = {
{"copy", (PyCFunction)SHA512_copy, METH_NOARGS, SHA512_copy__doc__},
{"digest", (PyCFunction)SHA512_digest, METH_NOARGS, SHA512_digest__doc__},
{"hexdigest", (PyCFunction)SHA512_hexdigest, METH_NOARGS, SHA512_hexdigest__doc__},
{"update", (PyCFunction)SHA512_update, METH_VARARGS, SHA512_update__doc__},
{NULL, NULL} /* sentinel */
};
static PyObject *
SHA512_get_block_size(PyObject *self, void *closure)
{
return PyInt_FromLong(SHA_BLOCKSIZE);
}
static PyObject *
SHA512_get_name(PyObject *self, void *closure)
{
if (((SHAobject *)self)->digestsize == 64)
return PyString_FromStringAndSize("SHA512", 6);
else
return PyString_FromStringAndSize("SHA384", 6);
}
static PyGetSetDef SHA_getseters[] = {
{"block_size",
(getter)SHA512_get_block_size, NULL,
NULL,
NULL},
{"name",
(getter)SHA512_get_name, NULL,
NULL,
NULL},
{NULL} /* Sentinel */
};
static PyMemberDef SHA_members[] = {
{"digest_size", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL},
/* the old md5 and sha modules support 'digest_size' as in PEP 247.
* the old sha module also supported 'digestsize'. ugh. */
{"digestsize", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL},
{NULL} /* Sentinel */
};
static PyTypeObject SHA384type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_sha512.sha384", /*tp_name*/
sizeof(SHAobject), /*tp_size*/
0, /*tp_itemsize*/
/* methods */
SHA512_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
0, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
SHA_methods, /* tp_methods */
SHA_members, /* tp_members */
SHA_getseters, /* tp_getset */
};
static PyTypeObject SHA512type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_sha512.sha512", /*tp_name*/
sizeof(SHAobject), /*tp_size*/
0, /*tp_itemsize*/
/* methods */
SHA512_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
0, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
SHA_methods, /* tp_methods */
SHA_members, /* tp_members */
SHA_getseters, /* tp_getset */
};
/* The single module-level function: new() */
PyDoc_STRVAR(SHA512_new__doc__,
"Return a new SHA-512 hash object; optionally initialized with a string.");
static PyObject *
SHA512_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
Py_buffer buf = { 0 };
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s*:new", kwlist,
&buf)) {
return NULL;
}
if ((new = newSHA512object()) == NULL) {
PyBuffer_Release(&buf);
return NULL;
}
sha512_init(new);
if (PyErr_Occurred()) {
Py_DECREF(new);
PyBuffer_Release(&buf);
return NULL;
}
if (buf.len > 0) {
sha512_update(new, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
return (PyObject *)new;
}
PyDoc_STRVAR(SHA384_new__doc__,
"Return a new SHA-384 hash object; optionally initialized with a string.");
static PyObject *
SHA384_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
Py_buffer buf = { 0 };
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s*:new", kwlist,
&buf)) {
return NULL;
}
if ((new = newSHA384object()) == NULL) {
PyBuffer_Release(&buf);
return NULL;
}
sha384_init(new);
if (PyErr_Occurred()) {
Py_DECREF(new);
PyBuffer_Release(&buf);
return NULL;
}
if (buf.len > 0) {
sha512_update(new, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
return (PyObject *)new;
}
/* List of functions exported by this module */
static struct PyMethodDef SHA_functions[] = {
{"sha512", (PyCFunction)SHA512_new, METH_VARARGS|METH_KEYWORDS, SHA512_new__doc__},
{"sha384", (PyCFunction)SHA384_new, METH_VARARGS|METH_KEYWORDS, SHA384_new__doc__},
{NULL, NULL} /* Sentinel */
};
/* Initialize this module. */
#define insint(n,v) { PyModule_AddIntConstant(m,n,v); }
PyMODINIT_FUNC
init_sha512(void)
{
PyObject *m;
Py_TYPE(&SHA384type) = &PyType_Type;
if (PyType_Ready(&SHA384type) < 0)
return;
Py_TYPE(&SHA512type) = &PyType_Type;
if (PyType_Ready(&SHA512type) < 0)
return;
m = Py_InitModule("_sha512", SHA_functions);
if (m == NULL)
return;
}
#endif

View File

@ -0,0 +1,623 @@
/* SHA module */
/* This module provides an interface to NIST's Secure Hash Algorithm */
/* See below for information about the original code this module was
based upon. Additional work performed by:
Andrew Kuchling (amk@amk.ca)
Greg Stein (gstein@lyra.org)
Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
Licensed to PSF under a Contributor Agreement.
*/
/* SHA objects */
#include "Python.h"
#include "structmember.h"
/* Endianness testing and definitions */
#define TestEndianness(variable) {int i=1; variable=PCT_BIG_ENDIAN;\
if (*((char*)&i)==1) variable=PCT_LITTLE_ENDIAN;}
#define PCT_LITTLE_ENDIAN 1
#define PCT_BIG_ENDIAN 0
/* Some useful types */
typedef unsigned char SHA_BYTE;
#if SIZEOF_INT == 4
typedef unsigned int SHA_INT32; /* 32-bit integer */
#else
/* not defined. compilation will die. */
#endif
/* The SHA block size and message digest sizes, in bytes */
#define SHA_BLOCKSIZE 64
#define SHA_DIGESTSIZE 20
/* The structure for storing SHS info */
typedef struct {
PyObject_HEAD
SHA_INT32 digest[5]; /* Message digest */
SHA_INT32 count_lo, count_hi; /* 64-bit bit count */
SHA_BYTE data[SHA_BLOCKSIZE]; /* SHA data buffer */
int Endianness;
int local; /* unprocessed amount in data */
} SHAobject;
/* When run on a little-endian CPU we need to perform byte reversal on an
array of longwords. */
static void longReverse(SHA_INT32 *buffer, int byteCount, int Endianness)
{
SHA_INT32 value;
if ( Endianness == PCT_BIG_ENDIAN )
return;
byteCount /= sizeof(*buffer);
while (byteCount--) {
value = *buffer;
value = ( ( value & 0xFF00FF00L ) >> 8 ) | \
( ( value & 0x00FF00FFL ) << 8 );
*buffer++ = ( value << 16 ) | ( value >> 16 );
}
}
static void SHAcopy(SHAobject *src, SHAobject *dest)
{
dest->Endianness = src->Endianness;
dest->local = src->local;
dest->count_lo = src->count_lo;
dest->count_hi = src->count_hi;
memcpy(dest->digest, src->digest, sizeof(src->digest));
memcpy(dest->data, src->data, sizeof(src->data));
}
/* ------------------------------------------------------------------------
*
* This code for the SHA algorithm was noted as public domain. The original
* headers are pasted below.
*
* Several changes have been made to make it more compatible with the
* Python environment and desired interface.
*
*/
/* NIST Secure Hash Algorithm */
/* heavily modified by Uwe Hollerbach <uh@alumni.caltech edu> */
/* from Peter C. Gutmann's implementation as found in */
/* Applied Cryptography by Bruce Schneier */
/* Further modifications to include the "UNRAVEL" stuff, below */
/* This code is in the public domain */
/* UNRAVEL should be fastest & biggest */
/* UNROLL_LOOPS should be just as big, but slightly slower */
/* both undefined should be smallest and slowest */
#define UNRAVEL
/* #define UNROLL_LOOPS */
/* The SHA f()-functions. The f1 and f3 functions can be optimized to
save one boolean operation each - thanks to Rich Schroeppel,
rcs@cs.arizona.edu for discovering this */
/*#define f1(x,y,z) ((x & y) | (~x & z)) // Rounds 0-19 */
#define f1(x,y,z) (z ^ (x & (y ^ z))) /* Rounds 0-19 */
#define f2(x,y,z) (x ^ y ^ z) /* Rounds 20-39 */
/*#define f3(x,y,z) ((x & y) | (x & z) | (y & z)) // Rounds 40-59 */
#define f3(x,y,z) ((x & y) | (z & (x | y))) /* Rounds 40-59 */
#define f4(x,y,z) (x ^ y ^ z) /* Rounds 60-79 */
/* SHA constants */
#define CONST1 0x5a827999L /* Rounds 0-19 */
#define CONST2 0x6ed9eba1L /* Rounds 20-39 */
#define CONST3 0x8f1bbcdcL /* Rounds 40-59 */
#define CONST4 0xca62c1d6L /* Rounds 60-79 */
/* 32-bit rotate */
#define R32(x,n) ((x << n) | (x >> (32 - n)))
/* the generic case, for when the overall rotation is not unraveled */
#define FG(n) \
T = R32(A,5) + f##n(B,C,D) + E + *WP++ + CONST##n; \
E = D; D = C; C = R32(B,30); B = A; A = T
/* specific cases, for when the overall rotation is unraveled */
#define FA(n) \
T = R32(A,5) + f##n(B,C,D) + E + *WP++ + CONST##n; B = R32(B,30)
#define FB(n) \
E = R32(T,5) + f##n(A,B,C) + D + *WP++ + CONST##n; A = R32(A,30)
#define FC(n) \
D = R32(E,5) + f##n(T,A,B) + C + *WP++ + CONST##n; T = R32(T,30)
#define FD(n) \
C = R32(D,5) + f##n(E,T,A) + B + *WP++ + CONST##n; E = R32(E,30)
#define FE(n) \
B = R32(C,5) + f##n(D,E,T) + A + *WP++ + CONST##n; D = R32(D,30)
#define FT(n) \
A = R32(B,5) + f##n(C,D,E) + T + *WP++ + CONST##n; C = R32(C,30)
/* do SHA transformation */
static void
sha_transform(SHAobject *sha_info)
{
int i;
SHA_INT32 T, A, B, C, D, E, W[80], *WP;
memcpy(W, sha_info->data, sizeof(sha_info->data));
longReverse(W, (int)sizeof(sha_info->data), sha_info->Endianness);
for (i = 16; i < 80; ++i) {
W[i] = W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16];
/* extra rotation fix */
W[i] = R32(W[i], 1);
}
A = sha_info->digest[0];
B = sha_info->digest[1];
C = sha_info->digest[2];
D = sha_info->digest[3];
E = sha_info->digest[4];
WP = W;
#ifdef UNRAVEL
FA(1); FB(1); FC(1); FD(1); FE(1); FT(1); FA(1); FB(1); FC(1); FD(1);
FE(1); FT(1); FA(1); FB(1); FC(1); FD(1); FE(1); FT(1); FA(1); FB(1);
FC(2); FD(2); FE(2); FT(2); FA(2); FB(2); FC(2); FD(2); FE(2); FT(2);
FA(2); FB(2); FC(2); FD(2); FE(2); FT(2); FA(2); FB(2); FC(2); FD(2);
FE(3); FT(3); FA(3); FB(3); FC(3); FD(3); FE(3); FT(3); FA(3); FB(3);
FC(3); FD(3); FE(3); FT(3); FA(3); FB(3); FC(3); FD(3); FE(3); FT(3);
FA(4); FB(4); FC(4); FD(4); FE(4); FT(4); FA(4); FB(4); FC(4); FD(4);
FE(4); FT(4); FA(4); FB(4); FC(4); FD(4); FE(4); FT(4); FA(4); FB(4);
sha_info->digest[0] += E;
sha_info->digest[1] += T;
sha_info->digest[2] += A;
sha_info->digest[3] += B;
sha_info->digest[4] += C;
#else /* !UNRAVEL */
#ifdef UNROLL_LOOPS
FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1);
FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1); FG(1);
FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2);
FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2); FG(2);
FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3);
FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3); FG(3);
FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4);
FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4); FG(4);
#else /* !UNROLL_LOOPS */
for (i = 0; i < 20; ++i) { FG(1); }
for (i = 20; i < 40; ++i) { FG(2); }
for (i = 40; i < 60; ++i) { FG(3); }
for (i = 60; i < 80; ++i) { FG(4); }
#endif /* !UNROLL_LOOPS */
sha_info->digest[0] += A;
sha_info->digest[1] += B;
sha_info->digest[2] += C;
sha_info->digest[3] += D;
sha_info->digest[4] += E;
#endif /* !UNRAVEL */
}
/* initialize the SHA digest */
static void
sha_init(SHAobject *sha_info)
{
TestEndianness(sha_info->Endianness)
sha_info->digest[0] = 0x67452301L;
sha_info->digest[1] = 0xefcdab89L;
sha_info->digest[2] = 0x98badcfeL;
sha_info->digest[3] = 0x10325476L;
sha_info->digest[4] = 0xc3d2e1f0L;
sha_info->count_lo = 0L;
sha_info->count_hi = 0L;
sha_info->local = 0;
}
/* update the SHA digest */
static void
sha_update(SHAobject *sha_info, SHA_BYTE *buffer, unsigned int count)
{
unsigned int i;
SHA_INT32 clo;
clo = sha_info->count_lo + ((SHA_INT32) count << 3);
if (clo < sha_info->count_lo) {
++sha_info->count_hi;
}
sha_info->count_lo = clo;
sha_info->count_hi += (SHA_INT32) count >> 29;
if (sha_info->local) {
i = SHA_BLOCKSIZE - sha_info->local;
if (i > count) {
i = count;
}
memcpy(((SHA_BYTE *) sha_info->data) + sha_info->local, buffer, i);
count -= i;
buffer += i;
sha_info->local += i;
if (sha_info->local == SHA_BLOCKSIZE) {
sha_transform(sha_info);
}
else {
return;
}
}
while (count >= SHA_BLOCKSIZE) {
memcpy(sha_info->data, buffer, SHA_BLOCKSIZE);
buffer += SHA_BLOCKSIZE;
count -= SHA_BLOCKSIZE;
sha_transform(sha_info);
}
memcpy(sha_info->data, buffer, count);
sha_info->local = count;
}
/* finish computing the SHA digest */
static void
sha_final(unsigned char digest[20], SHAobject *sha_info)
{
int count;
SHA_INT32 lo_bit_count, hi_bit_count;
lo_bit_count = sha_info->count_lo;
hi_bit_count = sha_info->count_hi;
count = (int) ((lo_bit_count >> 3) & 0x3f);
((SHA_BYTE *) sha_info->data)[count++] = 0x80;
if (count > SHA_BLOCKSIZE - 8) {
memset(((SHA_BYTE *) sha_info->data) + count, 0,
SHA_BLOCKSIZE - count);
sha_transform(sha_info);
memset((SHA_BYTE *) sha_info->data, 0, SHA_BLOCKSIZE - 8);
}
else {
memset(((SHA_BYTE *) sha_info->data) + count, 0,
SHA_BLOCKSIZE - 8 - count);
}
/* GJS: note that we add the hi/lo in big-endian. sha_transform will
swap these values into host-order. */
sha_info->data[56] = (hi_bit_count >> 24) & 0xff;
sha_info->data[57] = (hi_bit_count >> 16) & 0xff;
sha_info->data[58] = (hi_bit_count >> 8) & 0xff;
sha_info->data[59] = (hi_bit_count >> 0) & 0xff;
sha_info->data[60] = (lo_bit_count >> 24) & 0xff;
sha_info->data[61] = (lo_bit_count >> 16) & 0xff;
sha_info->data[62] = (lo_bit_count >> 8) & 0xff;
sha_info->data[63] = (lo_bit_count >> 0) & 0xff;
sha_transform(sha_info);
digest[ 0] = (unsigned char) ((sha_info->digest[0] >> 24) & 0xff);
digest[ 1] = (unsigned char) ((sha_info->digest[0] >> 16) & 0xff);
digest[ 2] = (unsigned char) ((sha_info->digest[0] >> 8) & 0xff);
digest[ 3] = (unsigned char) ((sha_info->digest[0] ) & 0xff);
digest[ 4] = (unsigned char) ((sha_info->digest[1] >> 24) & 0xff);
digest[ 5] = (unsigned char) ((sha_info->digest[1] >> 16) & 0xff);
digest[ 6] = (unsigned char) ((sha_info->digest[1] >> 8) & 0xff);
digest[ 7] = (unsigned char) ((sha_info->digest[1] ) & 0xff);
digest[ 8] = (unsigned char) ((sha_info->digest[2] >> 24) & 0xff);
digest[ 9] = (unsigned char) ((sha_info->digest[2] >> 16) & 0xff);
digest[10] = (unsigned char) ((sha_info->digest[2] >> 8) & 0xff);
digest[11] = (unsigned char) ((sha_info->digest[2] ) & 0xff);
digest[12] = (unsigned char) ((sha_info->digest[3] >> 24) & 0xff);
digest[13] = (unsigned char) ((sha_info->digest[3] >> 16) & 0xff);
digest[14] = (unsigned char) ((sha_info->digest[3] >> 8) & 0xff);
digest[15] = (unsigned char) ((sha_info->digest[3] ) & 0xff);
digest[16] = (unsigned char) ((sha_info->digest[4] >> 24) & 0xff);
digest[17] = (unsigned char) ((sha_info->digest[4] >> 16) & 0xff);
digest[18] = (unsigned char) ((sha_info->digest[4] >> 8) & 0xff);
digest[19] = (unsigned char) ((sha_info->digest[4] ) & 0xff);
}
/*
* End of copied SHA code.
*
* ------------------------------------------------------------------------
*/
static PyTypeObject SHAtype;
static SHAobject *
newSHAobject(void)
{
return (SHAobject *)PyObject_New(SHAobject, &SHAtype);
}
/* Internal methods for a hashing object */
static void
SHA_dealloc(PyObject *ptr)
{
PyObject_Del(ptr);
}
/* External methods for a hashing object */
PyDoc_STRVAR(SHA_copy__doc__, "Return a copy of the hashing object.");
static PyObject *
SHA_copy(SHAobject *self, PyObject *unused)
{
SHAobject *newobj;
if ( (newobj = newSHAobject())==NULL)
return NULL;
SHAcopy(self, newobj);
return (PyObject *)newobj;
}
PyDoc_STRVAR(SHA_digest__doc__,
"Return the digest value as a string of binary data.");
static PyObject *
SHA_digest(SHAobject *self, PyObject *unused)
{
unsigned char digest[SHA_DIGESTSIZE];
SHAobject temp;
SHAcopy(self, &temp);
sha_final(digest, &temp);
return PyString_FromStringAndSize((const char *)digest, sizeof(digest));
}
PyDoc_STRVAR(SHA_hexdigest__doc__,
"Return the digest value as a string of hexadecimal digits.");
static PyObject *
SHA_hexdigest(SHAobject *self, PyObject *unused)
{
unsigned char digest[SHA_DIGESTSIZE];
SHAobject temp;
PyObject *retval;
char *hex_digest;
int i, j;
/* Get the raw (binary) digest value */
SHAcopy(self, &temp);
sha_final(digest, &temp);
/* Create a new string */
retval = PyString_FromStringAndSize(NULL, sizeof(digest) * 2);
if (!retval)
return NULL;
hex_digest = PyString_AsString(retval);
if (!hex_digest) {
Py_DECREF(retval);
return NULL;
}
/* Make hex version of the digest */
for(i=j=0; i<sizeof(digest); i++) {
char c;
c = (digest[i] >> 4) & 0xf;
c = (c>9) ? c+'a'-10 : c + '0';
hex_digest[j++] = c;
c = (digest[i] & 0xf);
c = (c>9) ? c+'a'-10 : c + '0';
hex_digest[j++] = c;
}
return retval;
}
PyDoc_STRVAR(SHA_update__doc__,
"Update this hashing object's state with the provided string.");
static PyObject *
SHA_update(SHAobject *self, PyObject *args)
{
Py_buffer view;
Py_ssize_t n;
unsigned char *buf;
if (!PyArg_ParseTuple(args, "s*:update", &view))
return NULL;
n = view.len;
buf = (unsigned char *) view.buf;
while (n > 0) {
Py_ssize_t nbytes;
if (n > INT_MAX)
nbytes = INT_MAX;
else
nbytes = n;
sha_update(self, buf,
Py_SAFE_DOWNCAST(nbytes, Py_ssize_t, unsigned int));
buf += nbytes;
n -= nbytes;
}
PyBuffer_Release(&view);
Py_RETURN_NONE;
}
static PyMethodDef SHA_methods[] = {
{"copy", (PyCFunction)SHA_copy, METH_NOARGS, SHA_copy__doc__},
{"digest", (PyCFunction)SHA_digest, METH_NOARGS, SHA_digest__doc__},
{"hexdigest", (PyCFunction)SHA_hexdigest, METH_NOARGS, SHA_hexdigest__doc__},
{"update", (PyCFunction)SHA_update, METH_VARARGS, SHA_update__doc__},
{NULL, NULL} /* sentinel */
};
static PyObject *
SHA_get_block_size(PyObject *self, void *closure)
{
return PyInt_FromLong(SHA_BLOCKSIZE);
}
static PyObject *
SHA_get_digest_size(PyObject *self, void *closure)
{
return PyInt_FromLong(SHA_DIGESTSIZE);
}
static PyObject *
SHA_get_name(PyObject *self, void *closure)
{
return PyString_FromStringAndSize("SHA1", 4);
}
static PyGetSetDef SHA_getseters[] = {
{"digest_size",
(getter)SHA_get_digest_size, NULL,
NULL,
NULL},
{"block_size",
(getter)SHA_get_block_size, NULL,
NULL,
NULL},
{"name",
(getter)SHA_get_name, NULL,
NULL,
NULL},
/* the old md5 and sha modules support 'digest_size' as in PEP 247.
* the old sha module also supported 'digestsize'. ugh. */
{"digestsize",
(getter)SHA_get_digest_size, NULL,
NULL,
NULL},
{NULL} /* Sentinel */
};
static PyTypeObject SHAtype = {
PyVarObject_HEAD_INIT(NULL, 0)
"_sha.sha", /*tp_name*/
sizeof(SHAobject), /*tp_size*/
0, /*tp_itemsize*/
/* methods */
SHA_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
0, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
SHA_methods, /* tp_methods */
0, /* tp_members */
SHA_getseters, /* tp_getset */
};
/* The single module-level function: new() */
PyDoc_STRVAR(SHA_new__doc__,
"Return a new SHA hashing object. An optional string argument\n\
may be provided; if present, this string will be automatically\n\
hashed.");
static PyObject *
SHA_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
static char *kwlist[] = {"string", NULL};
SHAobject *new;
Py_buffer view = { 0 };
Py_ssize_t n;
unsigned char *buf;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s*:new", kwlist,
&view)) {
return NULL;
}
if ((new = newSHAobject()) == NULL) {
PyBuffer_Release(&view);
return NULL;
}
sha_init(new);
if (PyErr_Occurred()) {
Py_DECREF(new);
PyBuffer_Release(&view);
return NULL;
}
n = view.len;
buf = (unsigned char *) view.buf;
while (n > 0) {
Py_ssize_t nbytes;
if (n > INT_MAX)
nbytes = INT_MAX;
else
nbytes = n;
sha_update(new, buf,
Py_SAFE_DOWNCAST(nbytes, Py_ssize_t, unsigned int));
buf += nbytes;
n -= nbytes;
}
PyBuffer_Release(&view);
return (PyObject *)new;
}
/* List of functions exported by this module */
static struct PyMethodDef SHA_functions[] = {
{"new", (PyCFunction)SHA_new, METH_VARARGS|METH_KEYWORDS, SHA_new__doc__},
{NULL, NULL} /* Sentinel */
};
/* Initialize this module. */
#define insint(n,v) { PyModule_AddIntConstant(m,n,v); }
PyMODINIT_FUNC
init_sha(void)
{
PyObject *m;
Py_TYPE(&SHAtype) = &PyType_Type;
if (PyType_Ready(&SHAtype) < 0)
return;
m = Py_InitModule("_sha", SHA_functions);
if (m == NULL)
return;
/* Add some symbolic constants to the module */
insint("blocksize", 1); /* For future use, in case some hash
functions require an integral number of
blocks */
insint("digestsize", 20);
insint("digest_size", 20);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,251 @@
/* Socket module header file */
/* Includes needed for the sockaddr_* symbols below */
#ifndef MS_WINDOWS
#ifdef __VMS
# include <socket.h>
# else
# include <sys/socket.h>
# endif
# include <netinet/in.h>
# if !(defined(__BEOS__) || defined(__CYGWIN__) || (defined(PYOS_OS2) && defined(PYCC_VACPP)))
# include <netinet/tcp.h>
# endif
#else /* MS_WINDOWS */
# include <winsock2.h>
# include <ws2tcpip.h>
/* VC6 is shipped with old platform headers, and does not have MSTcpIP.h
* Separate SDKs have all the functions we want, but older ones don't have
* any version information.
* I use SIO_GET_MULTICAST_FILTER to detect a decent SDK.
*/
# ifdef SIO_GET_MULTICAST_FILTER
# include <MSTcpIP.h> /* for SIO_RCVALL */
# define HAVE_ADDRINFO
# define HAVE_SOCKADDR_STORAGE
# define HAVE_GETADDRINFO
# define HAVE_GETNAMEINFO
# define ENABLE_IPV6
# else
typedef int socklen_t;
# endif /* IPPROTO_IPV6 */
#endif /* MS_WINDOWS */
#ifdef HAVE_SYS_UN_H
# include <sys/un.h>
#else
# undef AF_UNIX
#endif
#ifdef HAVE_LINUX_NETLINK_H
# ifdef HAVE_ASM_TYPES_H
# include <asm/types.h>
# endif
# include <linux/netlink.h>
#else
# undef AF_NETLINK
#endif
#ifdef HAVE_BLUETOOTH_BLUETOOTH_H
#include <bluetooth/bluetooth.h>
#include <bluetooth/rfcomm.h>
#include <bluetooth/l2cap.h>
#include <bluetooth/sco.h>
#include <bluetooth/hci.h>
#endif
#ifdef HAVE_BLUETOOTH_H
#include <bluetooth.h>
#endif
#ifdef HAVE_NETPACKET_PACKET_H
# include <sys/ioctl.h>
# include <net/if.h>
# include <netpacket/packet.h>
#endif
#ifdef HAVE_LINUX_TIPC_H
# include <linux/tipc.h>
#endif
#ifndef Py__SOCKET_H
#define Py__SOCKET_H
#ifdef __cplusplus
extern "C" {
#endif
/* Python module and C API name */
#define PySocket_MODULE_NAME "_socket"
#define PySocket_CAPI_NAME "CAPI"
#define PySocket_CAPSULE_NAME (PySocket_MODULE_NAME "." PySocket_CAPI_NAME)
/* Abstract the socket file descriptor type */
#ifdef MS_WINDOWS
typedef SOCKET SOCKET_T;
# ifdef MS_WIN64
# define SIZEOF_SOCKET_T 8
# else
# define SIZEOF_SOCKET_T 4
# endif
#else
typedef int SOCKET_T;
# define SIZEOF_SOCKET_T SIZEOF_INT
#endif
/* Socket address */
typedef union sock_addr {
struct sockaddr_in in;
#ifdef AF_UNIX
struct sockaddr_un un;
#endif
#ifdef AF_NETLINK
struct sockaddr_nl nl;
#endif
#ifdef ENABLE_IPV6
struct sockaddr_in6 in6;
struct sockaddr_storage storage;
#endif
#ifdef HAVE_BLUETOOTH_BLUETOOTH_H
struct sockaddr_l2 bt_l2;
struct sockaddr_rc bt_rc;
struct sockaddr_sco bt_sco;
struct sockaddr_hci bt_hci;
#endif
#ifdef HAVE_NETPACKET_PACKET_H
struct sockaddr_ll ll;
#endif
} sock_addr_t;
/* The object holding a socket. It holds some extra information,
like the address family, which is used to decode socket address
arguments properly. */
typedef struct {
PyObject_HEAD
SOCKET_T sock_fd; /* Socket file descriptor */
int sock_family; /* Address family, e.g., AF_INET */
int sock_type; /* Socket type, e.g., SOCK_STREAM */
int sock_proto; /* Protocol type, usually 0 */
PyObject *(*errorhandler)(void); /* Error handler; checks
errno, returns NULL and
sets a Python exception */
double sock_timeout; /* Operation timeout in seconds;
0.0 means non-blocking */
PyObject *weakreflist;
} PySocketSockObject;
/* --- C API ----------------------------------------------------*/
/* Short explanation of what this C API export mechanism does
and how it works:
The _ssl module needs access to the type object defined in
the _socket module. Since cross-DLL linking introduces a lot of
problems on many platforms, the "trick" is to wrap the
C API of a module in a struct which then gets exported to
other modules via a PyCapsule.
The code in socketmodule.c defines this struct (which currently
only contains the type object reference, but could very
well also include other C APIs needed by other modules)
and exports it as PyCapsule via the module dictionary
under the name "CAPI".
Other modules can now include the socketmodule.h file
which defines the needed C APIs to import and set up
a static copy of this struct in the importing module.
After initialization, the importing module can then
access the C APIs from the _socket module by simply
referring to the static struct, e.g.
Load _socket module and its C API; this sets up the global
PySocketModule:
if (PySocketModule_ImportModuleAndAPI())
return;
Now use the C API as if it were defined in the using
module:
if (!PyArg_ParseTuple(args, "O!|zz:ssl",
PySocketModule.Sock_Type,
(PyObject*)&Sock,
&key_file, &cert_file))
return NULL;
Support could easily be extended to export more C APIs/symbols
this way. Currently, only the type object is exported,
other candidates would be socket constructors and socket
access functions.
*/
/* C API for usage by other Python modules */
typedef struct {
PyTypeObject *Sock_Type;
PyObject *error;
} PySocketModule_APIObject;
/* XXX The net effect of the following appears to be to define a function
XXX named PySocketModule_APIObject in _ssl.c. It's unclear why it isn't
XXX defined there directly.
>>> It's defined here because other modules might also want to use
>>> the C API.
*/
#ifndef PySocket_BUILDING_SOCKET
/* --- C API ----------------------------------------------------*/
/* Interfacestructure to C API for other modules.
Call PySocketModule_ImportModuleAndAPI() to initialize this
structure. After that usage is simple:
if (!PyArg_ParseTuple(args, "O!|zz:ssl",
&PySocketModule.Sock_Type, (PyObject*)&Sock,
&key_file, &cert_file))
return NULL;
...
*/
static
PySocketModule_APIObject PySocketModule;
/* You *must* call this before using any of the functions in
PySocketModule and check its outcome; otherwise all accesses will
result in a segfault. Returns 0 on success. */
#ifndef DPRINTF
# define DPRINTF if (0) printf
#endif
static
int PySocketModule_ImportModuleAndAPI(void)
{
void *api;
DPRINTF(" Loading capsule %s\n", PySocket_CAPSULE_NAME);
api = PyCapsule_Import(PySocket_CAPSULE_NAME, 1);
if (api == NULL)
goto onError;
memcpy(&PySocketModule, api, sizeof(PySocketModule));
DPRINTF(" API object loaded and initialized.\n");
return 0;
onError:
DPRINTF(" not found.\n");
return -1;
}
#endif /* !PySocket_BUILDING_SOCKET */
#ifdef __cplusplus
}
#endif
#endif /* !Py__SOCKET_H */

View File

@ -0,0 +1,104 @@
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
#ifndef SRE_INCLUDED
#define SRE_INCLUDED
#include "sre_constants.h"
/* size of a code word (must be unsigned short or larger, and
large enough to hold a UCS4 character) */
#ifdef Py_USING_UNICODE
# define SRE_CODE Py_UCS4
# if SIZEOF_SIZE_T > 4
# define SRE_MAXREPEAT (~(SRE_CODE)0)
# else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
# endif
#else
# define SRE_CODE unsigned int
# if SIZEOF_SIZE_T > SIZEOF_INT
# define SRE_MAXREPEAT (~(SRE_CODE)0)
# else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
# endif
#endif
typedef struct {
PyObject_VAR_HEAD
Py_ssize_t groups; /* must be first! */
PyObject* groupindex;
PyObject* indexgroup;
/* compatibility */
PyObject* pattern; /* pattern source (or None) */
int flags; /* flags used when compiling pattern source */
PyObject *weakreflist; /* List of weak references */
/* pattern code */
Py_ssize_t codesize;
SRE_CODE code[1];
} PatternObject;
#define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
typedef struct {
PyObject_VAR_HEAD
PyObject* string; /* link to the target string (must be first) */
PyObject* regs; /* cached list of matching spans */
PatternObject* pattern; /* link to the regex (pattern) object */
Py_ssize_t pos, endpos; /* current target slice */
Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
Py_ssize_t groups; /* number of groups (start/end marks) */
Py_ssize_t mark[1];
} MatchObject;
typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
/* FIXME: <fl> shouldn't be a constant, really... */
#define SRE_MARK_SIZE 200
typedef struct SRE_REPEAT_T {
Py_ssize_t count;
SRE_CODE* pattern; /* points to REPEAT operator arguments */
void* last_ptr; /* helper to check for infinite loops */
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
} SRE_REPEAT;
typedef struct {
/* string pointers */
void* ptr; /* current position (also end of current slice) */
void* beginning; /* start of original string */
void* start; /* start of current slice */
void* end; /* end of original string */
/* attributes for the match object */
PyObject* string;
Py_ssize_t pos, endpos;
/* character size */
int charsize;
/* registers */
Py_ssize_t lastindex;
Py_ssize_t lastmark;
void* mark[SRE_MARK_SIZE];
/* dynamically allocated stuff */
char* data_stack;
size_t data_stack_size;
size_t data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
/* hooks */
SRE_TOLOWER_HOOK lower;
} SRE_STATE;
typedef struct {
PyObject_HEAD
PyObject* pattern;
SRE_STATE state;
} ScannerObject;
#endif

View File

@ -0,0 +1,86 @@
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
#define SRE_MAGIC 20031017
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
#define SRE_OP_ANY_ALL 3
#define SRE_OP_ASSERT 4
#define SRE_OP_ASSERT_NOT 5
#define SRE_OP_AT 6
#define SRE_OP_BRANCH 7
#define SRE_OP_CALL 8
#define SRE_OP_CATEGORY 9
#define SRE_OP_CHARSET 10
#define SRE_OP_BIGCHARSET 11
#define SRE_OP_GROUPREF 12
#define SRE_OP_GROUPREF_EXISTS 13
#define SRE_OP_GROUPREF_IGNORE 14
#define SRE_OP_IN 15
#define SRE_OP_IN_IGNORE 16
#define SRE_OP_INFO 17
#define SRE_OP_JUMP 18
#define SRE_OP_LITERAL 19
#define SRE_OP_LITERAL_IGNORE 20
#define SRE_OP_MARK 21
#define SRE_OP_MAX_UNTIL 22
#define SRE_OP_MIN_UNTIL 23
#define SRE_OP_NOT_LITERAL 24
#define SRE_OP_NOT_LITERAL_IGNORE 25
#define SRE_OP_NEGATE 26
#define SRE_OP_RANGE 27
#define SRE_OP_REPEAT 28
#define SRE_OP_REPEAT_ONE 29
#define SRE_OP_SUBPATTERN 30
#define SRE_OP_MIN_REPEAT_ONE 31
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BEGINNING_STRING 2
#define SRE_AT_BOUNDARY 3
#define SRE_AT_NON_BOUNDARY 4
#define SRE_AT_END 5
#define SRE_AT_END_LINE 6
#define SRE_AT_END_STRING 7
#define SRE_AT_LOC_BOUNDARY 8
#define SRE_AT_LOC_NON_BOUNDARY 9
#define SRE_AT_UNI_BOUNDARY 10
#define SRE_AT_UNI_NON_BOUNDARY 11
#define SRE_CATEGORY_DIGIT 0
#define SRE_CATEGORY_NOT_DIGIT 1
#define SRE_CATEGORY_SPACE 2
#define SRE_CATEGORY_NOT_SPACE 3
#define SRE_CATEGORY_WORD 4
#define SRE_CATEGORY_NOT_WORD 5
#define SRE_CATEGORY_LINEBREAK 6
#define SRE_CATEGORY_NOT_LINEBREAK 7
#define SRE_CATEGORY_LOC_WORD 8
#define SRE_CATEGORY_LOC_NOT_WORD 9
#define SRE_CATEGORY_UNI_DIGIT 10
#define SRE_CATEGORY_UNI_NOT_DIGIT 11
#define SRE_CATEGORY_UNI_SPACE 12
#define SRE_CATEGORY_UNI_NOT_SPACE 13
#define SRE_CATEGORY_UNI_WORD 14
#define SRE_CATEGORY_UNI_NOT_WORD 15
#define SRE_CATEGORY_UNI_LINEBREAK 16
#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17
#define SRE_FLAG_TEMPLATE 1
#define SRE_FLAG_IGNORECASE 2
#define SRE_FLAG_LOCALE 4
#define SRE_FLAG_MULTILINE 8
#define SRE_FLAG_DOTALL 16
#define SRE_FLAG_UNICODE 32
#define SRE_FLAG_VERBOSE 64
#define SRE_INFO_PREFIX 1
#define SRE_INFO_LITERAL 2
#define SRE_INFO_CHARSET 4

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,86 @@
#include "Python.h"
#include "code.h"
#include "compile.h"
#include "Python-ast.h"
#include "symtable.h"
static PyObject *
symtable_symtable(PyObject *self, PyObject *args)
{
struct symtable *st;
PyObject *t;
char *str;
char *filename;
char *startstr;
int start;
if (!PyArg_ParseTuple(args, "sss:symtable", &str, &filename,
&startstr))
return NULL;
if (strcmp(startstr, "exec") == 0)
start = Py_file_input;
else if (strcmp(startstr, "eval") == 0)
start = Py_eval_input;
else if (strcmp(startstr, "single") == 0)
start = Py_single_input;
else {
PyErr_SetString(PyExc_ValueError,
"symtable() arg 3 must be 'exec' or 'eval' or 'single'");
return NULL;
}
st = Py_SymtableString(str, filename, start);
if (st == NULL)
return NULL;
t = (PyObject *)st->st_top;
Py_INCREF(t);
PyMem_Free((void *)st->st_future);
PySymtable_Free(st);
return t;
}
static PyMethodDef symtable_methods[] = {
{"symtable", symtable_symtable, METH_VARARGS,
PyDoc_STR("Return symbol and scope dictionaries"
" used internally by compiler.")},
{NULL, NULL} /* sentinel */
};
PyMODINIT_FUNC
init_symtable(void)
{
PyObject *m;
if (PyType_Ready(&PySTEntry_Type) < 0)
return;
m = Py_InitModule("_symtable", symtable_methods);
if (m == NULL)
return;
PyModule_AddIntConstant(m, "USE", USE);
PyModule_AddIntConstant(m, "DEF_GLOBAL", DEF_GLOBAL);
PyModule_AddIntConstant(m, "DEF_LOCAL", DEF_LOCAL);
PyModule_AddIntConstant(m, "DEF_PARAM", DEF_PARAM);
PyModule_AddIntConstant(m, "DEF_FREE", DEF_FREE);
PyModule_AddIntConstant(m, "DEF_FREE_CLASS", DEF_FREE_CLASS);
PyModule_AddIntConstant(m, "DEF_IMPORT", DEF_IMPORT);
PyModule_AddIntConstant(m, "DEF_BOUND", DEF_BOUND);
PyModule_AddIntConstant(m, "TYPE_FUNCTION", FunctionBlock);
PyModule_AddIntConstant(m, "TYPE_CLASS", ClassBlock);
PyModule_AddIntConstant(m, "TYPE_MODULE", ModuleBlock);
PyModule_AddIntConstant(m, "OPT_IMPORT_STAR", OPT_IMPORT_STAR);
PyModule_AddIntConstant(m, "OPT_EXEC", OPT_EXEC);
PyModule_AddIntConstant(m, "OPT_BARE_EXEC", OPT_BARE_EXEC);
PyModule_AddIntConstant(m, "LOCAL", LOCAL);
PyModule_AddIntConstant(m, "GLOBAL_EXPLICIT", GLOBAL_EXPLICIT);
PyModule_AddIntConstant(m, "GLOBAL_IMPLICIT", GLOBAL_IMPLICIT);
PyModule_AddIntConstant(m, "FREE", FREE);
PyModule_AddIntConstant(m, "CELL", CELL);
PyModule_AddIntConstant(m, "SCOPE_OFF", SCOPE_OFF);
PyModule_AddIntConstant(m, "SCOPE_MASK", SCOPE_MASK);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,297 @@
#include "Python.h"
#include "structmember.h"
PyDoc_STRVAR(xxsubtype__doc__,
"xxsubtype is an example module showing how to subtype builtin types from C.\n"
"test_descr.py in the standard test suite requires it in order to complete.\n"
"If you don't care about the examples, and don't intend to run the Python\n"
"test suite, you can recompile Python without Modules/xxsubtype.c.");
/* We link this module statically for convenience. If compiled as a shared
library instead, some compilers don't allow addresses of Python objects
defined in other libraries to be used in static initializers here. The
DEFERRED_ADDRESS macro is used to tag the slots where such addresses
appear; the module init function must fill in the tagged slots at runtime.
The argument is for documentation -- the macro ignores it.
*/
#define DEFERRED_ADDRESS(ADDR) 0
/* spamlist -- a list subtype */
typedef struct {
PyListObject list;
int state;
} spamlistobject;
static PyObject *
spamlist_getstate(spamlistobject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ":getstate"))
return NULL;
return PyInt_FromLong(self->state);
}
static PyObject *
spamlist_setstate(spamlistobject *self, PyObject *args)
{
int state;
if (!PyArg_ParseTuple(args, "i:setstate", &state))
return NULL;
self->state = state;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject *
spamlist_specialmeth(PyObject *self, PyObject *args, PyObject *kw)
{
PyObject *result = PyTuple_New(3);
if (result != NULL) {
if (self == NULL)
self = Py_None;
if (kw == NULL)
kw = Py_None;
Py_INCREF(self);
PyTuple_SET_ITEM(result, 0, self);
Py_INCREF(args);
PyTuple_SET_ITEM(result, 1, args);
Py_INCREF(kw);
PyTuple_SET_ITEM(result, 2, kw);
}
return result;
}
static PyMethodDef spamlist_methods[] = {
{"getstate", (PyCFunction)spamlist_getstate, METH_VARARGS,
PyDoc_STR("getstate() -> state")},
{"setstate", (PyCFunction)spamlist_setstate, METH_VARARGS,
PyDoc_STR("setstate(state)")},
/* These entries differ only in the flags; they are used by the tests
in test.test_descr. */
{"classmeth", (PyCFunction)spamlist_specialmeth,
METH_VARARGS | METH_KEYWORDS | METH_CLASS,
PyDoc_STR("classmeth(*args, **kw)")},
{"staticmeth", (PyCFunction)spamlist_specialmeth,
METH_VARARGS | METH_KEYWORDS | METH_STATIC,
PyDoc_STR("staticmeth(*args, **kw)")},
{NULL, NULL},
};
static int
spamlist_init(spamlistobject *self, PyObject *args, PyObject *kwds)
{
if (PyList_Type.tp_init((PyObject *)self, args, kwds) < 0)
return -1;
self->state = 0;
return 0;
}
static PyObject *
spamlist_state_get(spamlistobject *self)
{
return PyInt_FromLong(self->state);
}
static PyGetSetDef spamlist_getsets[] = {
{"state", (getter)spamlist_state_get, NULL,
PyDoc_STR("an int variable for demonstration purposes")},
{0}
};
static PyTypeObject spamlist_type = {
PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
"xxsubtype.spamlist",
sizeof(spamlistobject),
0,
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
spamlist_methods, /* tp_methods */
0, /* tp_members */
spamlist_getsets, /* tp_getset */
DEFERRED_ADDRESS(&PyList_Type), /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)spamlist_init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};
/* spamdict -- a dict subtype */
typedef struct {
PyDictObject dict;
int state;
} spamdictobject;
static PyObject *
spamdict_getstate(spamdictobject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ":getstate"))
return NULL;
return PyInt_FromLong(self->state);
}
static PyObject *
spamdict_setstate(spamdictobject *self, PyObject *args)
{
int state;
if (!PyArg_ParseTuple(args, "i:setstate", &state))
return NULL;
self->state = state;
Py_INCREF(Py_None);
return Py_None;
}
static PyMethodDef spamdict_methods[] = {
{"getstate", (PyCFunction)spamdict_getstate, METH_VARARGS,
PyDoc_STR("getstate() -> state")},
{"setstate", (PyCFunction)spamdict_setstate, METH_VARARGS,
PyDoc_STR("setstate(state)")},
{NULL, NULL},
};
static int
spamdict_init(spamdictobject *self, PyObject *args, PyObject *kwds)
{
if (PyDict_Type.tp_init((PyObject *)self, args, kwds) < 0)
return -1;
self->state = 0;
return 0;
}
static PyMemberDef spamdict_members[] = {
{"state", T_INT, offsetof(spamdictobject, state), READONLY,
PyDoc_STR("an int variable for demonstration purposes")},
{0}
};
static PyTypeObject spamdict_type = {
PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
"xxsubtype.spamdict",
sizeof(spamdictobject),
0,
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
spamdict_methods, /* tp_methods */
spamdict_members, /* tp_members */
0, /* tp_getset */
DEFERRED_ADDRESS(&PyDict_Type), /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)spamdict_init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};
static PyObject *
spam_bench(PyObject *self, PyObject *args)
{
PyObject *obj, *name, *res;
int n = 1000;
time_t t0, t1;
if (!PyArg_ParseTuple(args, "OS|i", &obj, &name, &n))
return NULL;
t0 = clock();
while (--n >= 0) {
res = PyObject_GetAttr(obj, name);
if (res == NULL)
return NULL;
Py_DECREF(res);
}
t1 = clock();
return PyFloat_FromDouble((double)(t1-t0) / CLOCKS_PER_SEC);
}
static PyMethodDef xxsubtype_functions[] = {
{"bench", spam_bench, METH_VARARGS},
{NULL, NULL} /* sentinel */
};
PyMODINIT_FUNC
initxxsubtype(void)
{
PyObject *m;
/* Fill in deferred data addresses. This must be done before
PyType_Ready() is called. Note that PyType_Ready() automatically
initializes the ob.ob_type field to &PyType_Type if it's NULL,
so it's not necessary to fill in ob_type first. */
spamdict_type.tp_base = &PyDict_Type;
if (PyType_Ready(&spamdict_type) < 0)
return;
spamlist_type.tp_base = &PyList_Type;
if (PyType_Ready(&spamlist_type) < 0)
return;
m = Py_InitModule3("xxsubtype",
xxsubtype_functions,
xxsubtype__doc__);
if (m == NULL)
return;
if (PyType_Ready(&spamlist_type) < 0)
return;
if (PyType_Ready(&spamdict_type) < 0)
return;
Py_INCREF(&spamlist_type);
if (PyModule_AddObject(m, "spamlist",
(PyObject *) &spamlist_type) < 0)
return;
Py_INCREF(&spamdict_type);
if (PyModule_AddObject(m, "spamdict",
(PyObject *) &spamdict_type) < 0)
return;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,368 @@
Frequently Asked Questions about zlib
If your question is not there, please check the zlib home page
http://zlib.net/ which may have more recent information.
The lastest zlib FAQ is at http://zlib.net/zlib_faq.html
1. Is zlib Y2K-compliant?
Yes. zlib doesn't handle dates.
2. Where can I get a Windows DLL version?
The zlib sources can be compiled without change to produce a DLL. See the
file win32/DLL_FAQ.txt in the zlib distribution. Pointers to the
precompiled DLL are found in the zlib web site at http://zlib.net/ .
3. Where can I get a Visual Basic interface to zlib?
See
* http://marknelson.us/1997/01/01/zlib-engine/
* win32/DLL_FAQ.txt in the zlib distribution
4. compress() returns Z_BUF_ERROR.
Make sure that before the call of compress(), the length of the compressed
buffer is equal to the available size of the compressed buffer and not
zero. For Visual Basic, check that this parameter is passed by reference
("as any"), not by value ("as long").
5. deflate() or inflate() returns Z_BUF_ERROR.
Before making the call, make sure that avail_in and avail_out are not zero.
When setting the parameter flush equal to Z_FINISH, also make sure that
avail_out is big enough to allow processing all pending input. Note that a
Z_BUF_ERROR is not fatal--another call to deflate() or inflate() can be
made with more input or output space. A Z_BUF_ERROR may in fact be
unavoidable depending on how the functions are used, since it is not
possible to tell whether or not there is more output pending when
strm.avail_out returns with zero. See http://zlib.net/zlib_how.html for a
heavily annotated example.
6. Where's the zlib documentation (man pages, etc.)?
It's in zlib.h . Examples of zlib usage are in the files test/example.c
and test/minigzip.c, with more in examples/ .
7. Why don't you use GNU autoconf or libtool or ...?
Because we would like to keep zlib as a very small and simple package.
zlib is rather portable and doesn't need much configuration.
8. I found a bug in zlib.
Most of the time, such problems are due to an incorrect usage of zlib.
Please try to reproduce the problem with a small program and send the
corresponding source to us at zlib@gzip.org . Do not send multi-megabyte
data files without prior agreement.
9. Why do I get "undefined reference to gzputc"?
If "make test" produces something like
example.o(.text+0x154): undefined reference to `gzputc'
check that you don't have old files libz.* in /usr/lib, /usr/local/lib or
/usr/X11R6/lib. Remove any old versions, then do "make install".
10. I need a Delphi interface to zlib.
See the contrib/delphi directory in the zlib distribution.
11. Can zlib handle .zip archives?
Not by itself, no. See the directory contrib/minizip in the zlib
distribution.
12. Can zlib handle .Z files?
No, sorry. You have to spawn an uncompress or gunzip subprocess, or adapt
the code of uncompress on your own.
13. How can I make a Unix shared library?
By default a shared (and a static) library is built for Unix. So:
make distclean
./configure
make
14. How do I install a shared zlib library on Unix?
After the above, then:
make install
However, many flavors of Unix come with a shared zlib already installed.
Before going to the trouble of compiling a shared version of zlib and
trying to install it, you may want to check if it's already there! If you
can #include <zlib.h>, it's there. The -lz option will probably link to
it. You can check the version at the top of zlib.h or with the
ZLIB_VERSION symbol defined in zlib.h .
15. I have a question about OttoPDF.
We are not the authors of OttoPDF. The real author is on the OttoPDF web
site: Joel Hainley, jhainley@myndkryme.com.
16. Can zlib decode Flate data in an Adobe PDF file?
Yes. See http://www.pdflib.com/ . To modify PDF forms, see
http://sourceforge.net/projects/acroformtool/ .
17. Why am I getting this "register_frame_info not found" error on Solaris?
After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib
generates an error such as:
ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so:
symbol __register_frame_info: referenced symbol not found
The symbol __register_frame_info is not part of zlib, it is generated by
the C compiler (cc or gcc). You must recompile applications using zlib
which have this problem. This problem is specific to Solaris. See
http://www.sunfreeware.com for Solaris versions of zlib and applications
using zlib.
18. Why does gzip give an error on a file I make with compress/deflate?
The compress and deflate functions produce data in the zlib format, which
is different and incompatible with the gzip format. The gz* functions in
zlib on the other hand use the gzip format. Both the zlib and gzip formats
use the same compressed data format internally, but have different headers
and trailers around the compressed data.
19. Ok, so why are there two different formats?
The gzip format was designed to retain the directory information about a
single file, such as the name and last modification date. The zlib format
on the other hand was designed for in-memory and communication channel
applications, and has a much more compact header and trailer and uses a
faster integrity check than gzip.
20. Well that's nice, but how do I make a gzip file in memory?
You can request that deflate write the gzip format instead of the zlib
format using deflateInit2(). You can also request that inflate decode the
gzip format using inflateInit2(). Read zlib.h for more details.
21. Is zlib thread-safe?
Yes. However any library routines that zlib uses and any application-
provided memory allocation routines must also be thread-safe. zlib's gz*
functions use stdio library routines, and most of zlib's functions use the
library memory allocation routines by default. zlib's *Init* functions
allow for the application to provide custom memory allocation routines.
Of course, you should only operate on any given zlib or gzip stream from a
single thread at a time.
22. Can I use zlib in my commercial application?
Yes. Please read the license in zlib.h.
23. Is zlib under the GNU license?
No. Please read the license in zlib.h.
24. The license says that altered source versions must be "plainly marked". So
what exactly do I need to do to meet that requirement?
You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h. In
particular, the final version number needs to be changed to "f", and an
identification string should be appended to ZLIB_VERSION. Version numbers
x.x.x.f are reserved for modifications to zlib by others than the zlib
maintainers. For example, if the version of the base zlib you are altering
is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and
ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3". You can also
update the version strings in deflate.c and inftrees.c.
For altered source distributions, you should also note the origin and
nature of the changes in zlib.h, as well as in ChangeLog and README, along
with the dates of the alterations. The origin should include at least your
name (or your company's name), and an email address to contact for help or
issues with the library.
Note that distributing a compiled zlib library along with zlib.h and
zconf.h is also a source distribution, and so you should change
ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes
in zlib.h as you would for a full source distribution.
25. Will zlib work on a big-endian or little-endian architecture, and can I
exchange compressed data between them?
Yes and yes.
26. Will zlib work on a 64-bit machine?
Yes. It has been tested on 64-bit machines, and has no dependence on any
data types being limited to 32-bits in length. If you have any
difficulties, please provide a complete problem report to zlib@gzip.org
27. Will zlib decompress data from the PKWare Data Compression Library?
No. The PKWare DCL uses a completely different compressed data format than
does PKZIP and zlib. However, you can look in zlib's contrib/blast
directory for a possible solution to your problem.
28. Can I access data randomly in a compressed stream?
No, not without some preparation. If when compressing you periodically use
Z_FULL_FLUSH, carefully write all the pending data at those points, and
keep an index of those locations, then you can start decompression at those
points. You have to be careful to not use Z_FULL_FLUSH too often, since it
can significantly degrade compression. Alternatively, you can scan a
deflate stream once to generate an index, and then use that index for
random access. See examples/zran.c .
29. Does zlib work on MVS, OS/390, CICS, etc.?
It has in the past, but we have not heard of any recent evidence. There
were working ports of zlib 1.1.4 to MVS, but those links no longer work.
If you know of recent, successful applications of zlib on these operating
systems, please let us know. Thanks.
30. Is there some simpler, easier to read version of inflate I can look at to
understand the deflate format?
First off, you should read RFC 1951. Second, yes. Look in zlib's
contrib/puff directory.
31. Does zlib infringe on any patents?
As far as we know, no. In fact, that was originally the whole point behind
zlib. Look here for some more information:
http://www.gzip.org/#faq11
32. Can zlib work with greater than 4 GB of data?
Yes. inflate() and deflate() will process any amount of data correctly.
Each call of inflate() or deflate() is limited to input and output chunks
of the maximum value that can be stored in the compiler's "unsigned int"
type, but there is no limit to the number of chunks. Note however that the
strm.total_in and strm_total_out counters may be limited to 4 GB. These
counters are provided as a convenience and are not used internally by
inflate() or deflate(). The application can easily set up its own counters
updated after each call of inflate() or deflate() to count beyond 4 GB.
compress() and uncompress() may be limited to 4 GB, since they operate in a
single call. gzseek() and gztell() may be limited to 4 GB depending on how
zlib is compiled. See the zlibCompileFlags() function in zlib.h.
The word "may" appears several times above since there is a 4 GB limit only
if the compiler's "long" type is 32 bits. If the compiler's "long" type is
64 bits, then the limit is 16 exabytes.
33. Does zlib have any security vulnerabilities?
The only one that we are aware of is potentially in gzprintf(). If zlib is
compiled to use sprintf() or vsprintf(), then there is no protection
against a buffer overflow of an 8K string space (or other value as set by
gzbuffer()), other than the caller of gzprintf() assuring that the output
will not exceed 8K. On the other hand, if zlib is compiled to use
snprintf() or vsnprintf(), which should normally be the case, then there is
no vulnerability. The ./configure script will display warnings if an
insecure variation of sprintf() will be used by gzprintf(). Also the
zlibCompileFlags() function will return information on what variant of
sprintf() is used by gzprintf().
If you don't have snprintf() or vsnprintf() and would like one, you can
find a portable implementation here:
http://www.ijs.si/software/snprintf/
Note that you should be using the most recent version of zlib. Versions
1.1.3 and before were subject to a double-free vulnerability, and versions
1.2.1 and 1.2.2 were subject to an access exception when decompressing
invalid compressed data.
34. Is there a Java version of zlib?
Probably what you want is to use zlib in Java. zlib is already included
as part of the Java SDK in the java.util.zip package. If you really want
a version of zlib written in the Java language, look on the zlib home
page for links: http://zlib.net/ .
35. I get this or that compiler or source-code scanner warning when I crank it
up to maximally-pedantic. Can't you guys write proper code?
Many years ago, we gave up attempting to avoid warnings on every compiler
in the universe. It just got to be a waste of time, and some compilers
were downright silly as well as contradicted each other. So now, we simply
make sure that the code always works.
36. Valgrind (or some similar memory access checker) says that deflate is
performing a conditional jump that depends on an uninitialized value.
Isn't that a bug?
No. That is intentional for performance reasons, and the output of deflate
is not affected. This only started showing up recently since zlib 1.2.x
uses malloc() by default for allocations, whereas earlier versions used
calloc(), which zeros out the allocated memory. Even though the code was
correct, versions 1.2.4 and later was changed to not stimulate these
checkers.
37. Will zlib read the (insert any ancient or arcane format here) compressed
data format?
Probably not. Look in the comp.compression FAQ for pointers to various
formats and associated software.
38. How can I encrypt/decrypt zip files with zlib?
zlib doesn't support encryption. The original PKZIP encryption is very
weak and can be broken with freely available programs. To get strong
encryption, use GnuPG, http://www.gnupg.org/ , which already includes zlib
compression. For PKZIP compatible "encryption", look at
http://www.info-zip.org/
39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings?
"gzip" is the gzip format, and "deflate" is the zlib format. They should
probably have called the second one "zlib" instead to avoid confusion with
the raw deflate compressed data format. While the HTTP 1.1 RFC 2616
correctly points to the zlib specification in RFC 1950 for the "deflate"
transfer encoding, there have been reports of servers and browsers that
incorrectly produce or expect raw deflate data per the deflate
specification in RFC 1951, most notably Microsoft. So even though the
"deflate" transfer encoding using the zlib format would be the more
efficient approach (and in fact exactly what the zlib format was designed
for), using the "gzip" transfer encoding is probably more reliable due to
an unfortunate choice of name on the part of the HTTP 1.1 authors.
Bottom line: use the gzip format for HTTP 1.1 encoding.
40. Does zlib support the new "Deflate64" format introduced by PKWare?
No. PKWare has apparently decided to keep that format proprietary, since
they have not documented it as they have previous compression formats. In
any case, the compression improvements are so modest compared to other more
modern approaches, that it's not worth the effort to implement.
41. I'm having a problem with the zip functions in zlib, can you help?
There are no zip functions in zlib. You are probably using minizip by
Giles Vollant, which is found in the contrib directory of zlib. It is not
part of zlib. In fact none of the stuff in contrib is part of zlib. The
files in there are not supported by the zlib authors. You need to contact
the authors of the respective contribution for help.
42. The match.asm code in contrib is under the GNU General Public License.
Since it's part of zlib, doesn't that mean that all of zlib falls under the
GNU GPL?
No. The files in contrib are not part of zlib. They were contributed by
other authors and are provided as a convenience to the user within the zlib
distribution. Each item in contrib has its own license.
43. Is zlib subject to export controls? What is its ECCN?
zlib is not subject to export controls, and so is classified as EAR99.
44. Can you please sign these lengthy legal documents and fax them back to us
so that we can use your software in our product?
No. Go away. Shoo.

View File

@ -0,0 +1,68 @@
CMakeLists.txt cmake build file
ChangeLog history of changes
FAQ Frequently Asked Questions about zlib
INDEX this file
Makefile dummy Makefile that tells you to ./configure
Makefile.in template for Unix Makefile
README guess what
configure configure script for Unix
make_vms.com makefile for VMS
test/example.c zlib usages examples for build testing
test/minigzip.c minimal gzip-like functionality for build testing
test/infcover.c inf*.c code coverage for build coverage testing
treebuild.xml XML description of source file dependencies
zconf.h.cmakein zconf.h template for cmake
zconf.h.in zconf.h template for configure
zlib.3 Man page for zlib
zlib.3.pdf Man page in PDF format
zlib.map Linux symbol information
zlib.pc.in Template for pkg-config descriptor
zlib.pc.cmakein zlib.pc template for cmake
zlib2ansi perl script to convert source files for C++ compilation
amiga/ makefiles for Amiga SAS C
as400/ makefiles for AS/400
doc/ documentation for formats and algorithms
msdos/ makefiles for MSDOS
nintendods/ makefile for Nintendo DS
old/ makefiles for various architectures and zlib documentation
files that have not yet been updated for zlib 1.2.x
qnx/ makefiles for QNX
watcom/ makefiles for OpenWatcom
win32/ makefiles for Windows
zlib public header files (required for library use):
zconf.h
zlib.h
private source files used to build the zlib library:
adler32.c
compress.c
crc32.c
crc32.h
deflate.c
deflate.h
gzclose.c
gzguts.h
gzlib.c
gzread.c
gzwrite.c
infback.c
inffast.c
inffast.h
inffixed.h
inflate.c
inflate.h
inftrees.c
inftrees.h
trees.c
trees.h
uncompr.c
zutil.c
zutil.h
source files for sample programs
See examples/README.examples
unsupported contributions by third parties
See contrib/README.contrib

View File

@ -0,0 +1,115 @@
ZLIB DATA COMPRESSION LIBRARY
zlib 1.2.8 is a general purpose data compression library. All the code is
thread safe. The data format used by the zlib library is described by RFCs
(Request for Comments) 1950 to 1952 in the files
http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
rfc1952 (gzip format).
All functions of the compression library are documented in the file zlib.h
(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example
of the library is given in the file test/example.c which also tests that
the library is working correctly. Another example is given in the file
test/minigzip.c. The compression library itself is composed of all source
files in the root directory.
To compile all files and run the test program, follow the instructions given at
the top of Makefile.in. In short "./configure; make test", and if that goes
well, "make install" should work for most flavors of Unix. For Windows, use
one of the special makefiles in win32/ or contrib/vstudio/ . For VMS, use
make_vms.com.
Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
<info@winimage.com> for the Windows DLL version. The zlib home page is
http://zlib.net/ . Before reporting a problem, please check this site to
verify that you have the latest version of zlib; otherwise get the latest
version and check whether the problem still exists or not.
PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
issue of Dr. Dobb's Journal; a copy of the article is available at
http://marknelson.us/1997/01/01/zlib-engine/ .
The changes made in version 1.2.8 are documented in the file ChangeLog.
Unsupported third party contributions are provided in directory contrib/ .
zlib is available in Java using the java.util.zip package, documented at
http://java.sun.com/developer/technicalArticles/Programming/compression/ .
A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
at CPAN (Comprehensive Perl Archive Network) sites, including
http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
available in Python 1.5 and later versions, see
http://docs.python.org/library/zlib.html .
zlib is built into tcl: http://wiki.tcl.tk/4610 .
An experimental package to read and write files in .zip format, written on top
of zlib by Gilles Vollant <info@winimage.com>, is available in the
contrib/minizip directory of zlib.
Notes for some targets:
- For Windows DLL versions, please see win32/DLL_FAQ.txt
- For 64-bit Irix, deflate.c must be compiled without any optimization. With
-O, one libpng test fails. The test works in 32 bit mode (with the -n32
compiler flag). The compiler bug has been reported to SGI.
- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
when compiled with cc.
- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
necessary to get gzprintf working correctly. This is done by configure.
- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
other compilers. Use "make test" to check your compiler.
- gzdopen is not supported on RISCOS or BEOS.
- For PalmOs, see http://palmzlib.sourceforge.net/
Acknowledgments:
The deflate format used by zlib was defined by Phil Katz. The deflate and
zlib specifications were written by L. Peter Deutsch. Thanks to all the
people who reported problems and suggested various improvements in zlib; they
are too numerous to cite here.
Copyright notice:
(C) 1995-2013 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
If you use the zlib library in a product, we would appreciate *not* receiving
lengthy legal documents to sign. The sources are provided for free but without
warranty of any kind. The library has been entirely written by Jean-loup
Gailly and Mark Adler; it does not include third-party code.
If you redistribute modified sources, we would appreciate that you include in
the file ChangeLog history information documenting your changes. Please read
the FAQ for more information on the distribution of modified source versions.

View File

@ -0,0 +1,179 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-2011 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zutil.h"
#define local static
local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
#define BASE 65521 /* largest prime smaller than 65536 */
#define NMAX 5552
/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
#define DO16(buf) DO8(buf,0); DO8(buf,8);
/* use NO_DIVIDE if your processor does not do division in hardware --
try it both ways to see which is faster */
#ifdef NO_DIVIDE
/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
(thank you to John Reiser for pointing this out) */
# define CHOP(a) \
do { \
unsigned long tmp = a >> 16; \
a &= 0xffffUL; \
a += (tmp << 4) - tmp; \
} while (0)
# define MOD28(a) \
do { \
CHOP(a); \
if (a >= BASE) a -= BASE; \
} while (0)
# define MOD(a) \
do { \
CHOP(a); \
MOD28(a); \
} while (0)
# define MOD63(a) \
do { /* this assumes a is not negative */ \
z_off64_t tmp = a >> 32; \
a &= 0xffffffffL; \
a += (tmp << 8) - (tmp << 5) + tmp; \
tmp = a >> 16; \
a &= 0xffffL; \
a += (tmp << 4) - tmp; \
tmp = a >> 16; \
a &= 0xffffL; \
a += (tmp << 4) - tmp; \
if (a >= BASE) a -= BASE; \
} while (0)
#else
# define MOD(a) a %= BASE
# define MOD28(a) a %= BASE
# define MOD63(a) a %= BASE
#endif
/* ========================================================================= */
uLong ZEXPORT adler32(adler, buf, len)
uLong adler;
const Bytef *buf;
uInt len;
{
unsigned long sum2;
unsigned n;
/* split Adler-32 into component sums */
sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (len == 1) {
adler += buf[0];
if (adler >= BASE)
adler -= BASE;
sum2 += adler;
if (sum2 >= BASE)
sum2 -= BASE;
return adler | (sum2 << 16);
}
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (buf == Z_NULL)
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (len < 16) {
while (len--) {
adler += *buf++;
sum2 += adler;
}
if (adler >= BASE)
adler -= BASE;
MOD28(sum2); /* only added so many BASE's */
return adler | (sum2 << 16);
}
/* do length NMAX blocks -- requires just one modulo operation */
while (len >= NMAX) {
len -= NMAX;
n = NMAX / 16; /* NMAX is divisible by 16 */
do {
DO16(buf); /* 16 sums unrolled */
buf += 16;
} while (--n);
MOD(adler);
MOD(sum2);
}
/* do remaining bytes (less than NMAX, still just one modulo) */
if (len) { /* avoid modulos if none remaining */
while (len >= 16) {
len -= 16;
DO16(buf);
buf += 16;
}
while (len--) {
adler += *buf++;
sum2 += adler;
}
MOD(adler);
MOD(sum2);
}
/* return recombined sums */
return adler | (sum2 << 16);
}
/* ========================================================================= */
local uLong adler32_combine_(adler1, adler2, len2)
uLong adler1;
uLong adler2;
z_off64_t len2;
{
unsigned long sum1;
unsigned long sum2;
unsigned rem;
/* for negative len, return invalid adler32 as a clue for debugging */
if (len2 < 0)
return 0xffffffffUL;
/* the derivation of this formula is left as an exercise for the reader */
MOD63(len2); /* assumes len2 >= 0 */
rem = (unsigned)len2;
sum1 = adler1 & 0xffff;
sum2 = rem * sum1;
MOD(sum2);
sum1 += (adler2 & 0xffff) + BASE - 1;
sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
if (sum1 >= BASE) sum1 -= BASE;
if (sum1 >= BASE) sum1 -= BASE;
if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1);
if (sum2 >= BASE) sum2 -= BASE;
return sum1 | (sum2 << 16);
}
/* ========================================================================= */
uLong ZEXPORT adler32_combine(adler1, adler2, len2)
uLong adler1;
uLong adler2;
z_off_t len2;
{
return adler32_combine_(adler1, adler2, len2);
}
uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
uLong adler1;
uLong adler2;
z_off64_t len2;
{
return adler32_combine_(adler1, adler2, len2);
}

View File

@ -0,0 +1,209 @@
1. Compression algorithm (deflate)
The deflation algorithm used by gzip (also zip and zlib) is a variation of
LZ77 (Lempel-Ziv 1977, see reference below). It finds duplicated strings in
the input data. The second occurrence of a string is replaced by a
pointer to the previous string, in the form of a pair (distance,
length). Distances are limited to 32K bytes, and lengths are limited
to 258 bytes. When a string does not occur anywhere in the previous
32K bytes, it is emitted as a sequence of literal bytes. (In this
description, `string' must be taken as an arbitrary sequence of bytes,
and is not restricted to printable characters.)
Literals or match lengths are compressed with one Huffman tree, and
match distances are compressed with another tree. The trees are stored
in a compact form at the start of each block. The blocks can have any
size (except that the compressed data for one block must fit in
available memory). A block is terminated when deflate() determines that
it would be useful to start another block with fresh trees. (This is
somewhat similar to the behavior of LZW-based _compress_.)
Duplicated strings are found using a hash table. All input strings of
length 3 are inserted in the hash table. A hash index is computed for
the next 3 bytes. If the hash chain for this index is not empty, all
strings in the chain are compared with the current input string, and
the longest match is selected.
The hash chains are searched starting with the most recent strings, to
favor small distances and thus take advantage of the Huffman encoding.
The hash chains are singly linked. There are no deletions from the
hash chains, the algorithm simply discards matches that are too old.
To avoid a worst-case situation, very long hash chains are arbitrarily
truncated at a certain length, determined by a runtime option (level
parameter of deflateInit). So deflate() does not always find the longest
possible match but generally finds a match which is long enough.
deflate() also defers the selection of matches with a lazy evaluation
mechanism. After a match of length N has been found, deflate() searches for
a longer match at the next input byte. If a longer match is found, the
previous match is truncated to a length of one (thus producing a single
literal byte) and the process of lazy evaluation begins again. Otherwise,
the original match is kept, and the next match search is attempted only N
steps later.
The lazy match evaluation is also subject to a runtime parameter. If
the current match is long enough, deflate() reduces the search for a longer
match, thus speeding up the whole process. If compression ratio is more
important than speed, deflate() attempts a complete second search even if
the first match is already long enough.
The lazy match evaluation is not performed for the fastest compression
modes (level parameter 1 to 3). For these fast modes, new strings
are inserted in the hash table only when no match was found, or
when the match is not too long. This degrades the compression ratio
but saves time since there are both fewer insertions and fewer searches.
2. Decompression algorithm (inflate)
2.1 Introduction
The key question is how to represent a Huffman code (or any prefix code) so
that you can decode fast. The most important characteristic is that shorter
codes are much more common than longer codes, so pay attention to decoding the
short codes fast, and let the long codes take longer to decode.
inflate() sets up a first level table that covers some number of bits of
input less than the length of longest code. It gets that many bits from the
stream, and looks it up in the table. The table will tell if the next
code is that many bits or less and how many, and if it is, it will tell
the value, else it will point to the next level table for which inflate()
grabs more bits and tries to decode a longer code.
How many bits to make the first lookup is a tradeoff between the time it
takes to decode and the time it takes to build the table. If building the
table took no time (and if you had infinite memory), then there would only
be a first level table to cover all the way to the longest code. However,
building the table ends up taking a lot longer for more bits since short
codes are replicated many times in such a table. What inflate() does is
simply to make the number of bits in the first table a variable, and then
to set that variable for the maximum speed.
For inflate, which has 286 possible codes for the literal/length tree, the size
of the first table is nine bits. Also the distance trees have 30 possible
values, and the size of the first table is six bits. Note that for each of
those cases, the table ended up one bit longer than the ``average'' code
length, i.e. the code length of an approximately flat code which would be a
little more than eight bits for 286 symbols and a little less than five bits
for 30 symbols.
2.2 More details on the inflate table lookup
Ok, you want to know what this cleverly obfuscated inflate tree actually
looks like. You are correct that it's not a Huffman tree. It is simply a
lookup table for the first, let's say, nine bits of a Huffman symbol. The
symbol could be as short as one bit or as long as 15 bits. If a particular
symbol is shorter than nine bits, then that symbol's translation is duplicated
in all those entries that start with that symbol's bits. For example, if the
symbol is four bits, then it's duplicated 32 times in a nine-bit table. If a
symbol is nine bits long, it appears in the table once.
If the symbol is longer than nine bits, then that entry in the table points
to another similar table for the remaining bits. Again, there are duplicated
entries as needed. The idea is that most of the time the symbol will be short
and there will only be one table look up. (That's whole idea behind data
compression in the first place.) For the less frequent long symbols, there
will be two lookups. If you had a compression method with really long
symbols, you could have as many levels of lookups as is efficient. For
inflate, two is enough.
So a table entry either points to another table (in which case nine bits in
the above example are gobbled), or it contains the translation for the symbol
and the number of bits to gobble. Then you start again with the next
ungobbled bit.
You may wonder: why not just have one lookup table for how ever many bits the
longest symbol is? The reason is that if you do that, you end up spending
more time filling in duplicate symbol entries than you do actually decoding.
At least for deflate's output that generates new trees every several 10's of
kbytes. You can imagine that filling in a 2^15 entry table for a 15-bit code
would take too long if you're only decoding several thousand symbols. At the
other extreme, you could make a new table for every bit in the code. In fact,
that's essentially a Huffman tree. But then you spend too much time
traversing the tree while decoding, even for short symbols.
So the number of bits for the first lookup table is a trade of the time to
fill out the table vs. the time spent looking at the second level and above of
the table.
Here is an example, scaled down:
The code being decoded, with 10 symbols, from 1 to 6 bits long:
A: 0
B: 10
C: 1100
D: 11010
E: 11011
F: 11100
G: 11101
H: 11110
I: 111110
J: 111111
Let's make the first table three bits long (eight entries):
000: A,1
001: A,1
010: A,1
011: A,1
100: B,2
101: B,2
110: -> table X (gobble 3 bits)
111: -> table Y (gobble 3 bits)
Each entry is what the bits decode as and how many bits that is, i.e. how
many bits to gobble. Or the entry points to another table, with the number of
bits to gobble implicit in the size of the table.
Table X is two bits long since the longest code starting with 110 is five bits
long:
00: C,1
01: C,1
10: D,2
11: E,2
Table Y is three bits long since the longest code starting with 111 is six
bits long:
000: F,2
001: F,2
010: G,2
011: G,2
100: H,2
101: H,2
110: I,3
111: J,3
So what we have here are three tables with a total of 20 entries that had to
be constructed. That's compared to 64 entries for a single table. Or
compared to 16 entries for a Huffman tree (six two entry tables and one four
entry table). Assuming that the code ideally represents the probability of
the symbols, it takes on the average 1.25 lookups per symbol. That's compared
to one lookup for the single table, or 1.66 lookups per symbol for the
Huffman tree.
There, I think that gives you a picture of what's going on. For inflate, the
meaning of a particular symbol is often more than just a letter. It can be a
byte (a "literal"), or it can be either a length or a distance which
indicates a base value and a number of bits to fetch after the code that is
added to the base value. Or it might be the special end-of-block code. The
data structures created in inftrees.c try to encode all that information
compactly in the tables.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
References:
[LZ77] Ziv J., Lempel A., ``A Universal Algorithm for Sequential Data
Compression,'' IEEE Transactions on Information Theory, Vol. 23, No. 3,
pp. 337-343.
``DEFLATE Compressed Data Format Specification'' available in
http://tools.ietf.org/html/rfc1951

View File

@ -0,0 +1,80 @@
/* compress.c -- compress a memory buffer
* Copyright (C) 1995-2005 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#define ZLIB_INTERNAL
#include "zlib.h"
/* ===========================================================================
Compresses the source buffer into the destination buffer. The level
parameter has the same meaning as in deflateInit. sourceLen is the byte
length of the source buffer. Upon entry, destLen is the total size of the
destination buffer, which must be at least 0.1% larger than sourceLen plus
12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
memory, Z_BUF_ERROR if there was not enough room in the output buffer,
Z_STREAM_ERROR if the level parameter is invalid.
*/
int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
int level;
{
z_stream stream;
int err;
stream.next_in = (z_const Bytef *)source;
stream.avail_in = (uInt)sourceLen;
#ifdef MAXSEG_64K
/* Check for source > 64K on 16-bit machine: */
if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
#endif
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0;
stream.opaque = (voidpf)0;
err = deflateInit(&stream, level);
if (err != Z_OK) return err;
err = deflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
deflateEnd(&stream);
return err == Z_OK ? Z_BUF_ERROR : err;
}
*destLen = stream.total_out;
err = deflateEnd(&stream);
return err;
}
/* ===========================================================================
*/
int ZEXPORT compress (dest, destLen, source, sourceLen)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
{
return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
}
/* ===========================================================================
If the default memLevel or windowBits for deflateInit() is changed, then
this function needs to be updated.
*/
uLong ZEXPORT compressBound (sourceLen)
uLong sourceLen;
{
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
(sourceLen >> 25) + 13;
}

View File

@ -0,0 +1,831 @@
#!/bin/sh
# configure script for zlib.
#
# Normally configure builds both a static and a shared library.
# If you want to build just a static library, use: ./configure --static
#
# To impose specific compiler or flags or install directory, use for example:
# prefix=$HOME CC=cc CFLAGS="-O4" ./configure
# or for csh/tcsh users:
# (setenv prefix $HOME; setenv CC cc; setenv CFLAGS "-O4"; ./configure)
# Incorrect settings of CC or CFLAGS may prevent creating a shared library.
# If you have problems, try without defining CC and CFLAGS before reporting
# an error.
# start off configure.log
echo -------------------- >> configure.log
echo $0 $* >> configure.log
date >> configure.log
# set command prefix for cross-compilation
if [ -n "${CHOST}" ]; then
uname="`echo "${CHOST}" | sed -e 's/^[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)-.*$/\1/'`"
CROSS_PREFIX="${CHOST}-"
fi
# destination name for static library
STATICLIB=libz.a
# extract zlib version numbers from zlib.h
VER=`sed -n -e '/VERSION "/s/.*"\(.*\)".*/\1/p' < zlib.h`
VER3=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\\.[0-9]*\).*/\1/p' < zlib.h`
VER2=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\)\\..*/\1/p' < zlib.h`
VER1=`sed -n -e '/VERSION "/s/.*"\([0-9]*\)\\..*/\1/p' < zlib.h`
# establish commands for library building
if "${CROSS_PREFIX}ar" --version >/dev/null 2>/dev/null || test $? -lt 126; then
AR=${AR-"${CROSS_PREFIX}ar"}
test -n "${CROSS_PREFIX}" && echo Using ${AR} | tee -a configure.log
else
AR=${AR-"ar"}
test -n "${CROSS_PREFIX}" && echo Using ${AR} | tee -a configure.log
fi
ARFLAGS=${ARFLAGS-"rc"}
if "${CROSS_PREFIX}ranlib" --version >/dev/null 2>/dev/null || test $? -lt 126; then
RANLIB=${RANLIB-"${CROSS_PREFIX}ranlib"}
test -n "${CROSS_PREFIX}" && echo Using ${RANLIB} | tee -a configure.log
else
RANLIB=${RANLIB-"ranlib"}
fi
if "${CROSS_PREFIX}nm" --version >/dev/null 2>/dev/null || test $? -lt 126; then
NM=${NM-"${CROSS_PREFIX}nm"}
test -n "${CROSS_PREFIX}" && echo Using ${NM} | tee -a configure.log
else
NM=${NM-"nm"}
fi
# set defaults before processing command line options
LDCONFIG=${LDCONFIG-"ldconfig"}
LDSHAREDLIBC="${LDSHAREDLIBC--lc}"
ARCHS=
prefix=${prefix-/usr/local}
exec_prefix=${exec_prefix-'${prefix}'}
libdir=${libdir-'${exec_prefix}/lib'}
sharedlibdir=${sharedlibdir-'${libdir}'}
includedir=${includedir-'${prefix}/include'}
mandir=${mandir-'${prefix}/share/man'}
shared_ext='.so'
shared=1
solo=0
cover=0
zprefix=0
zconst=0
build64=0
gcc=0
old_cc="$CC"
old_cflags="$CFLAGS"
OBJC='$(OBJZ) $(OBJG)'
PIC_OBJC='$(PIC_OBJZ) $(PIC_OBJG)'
# leave this script, optionally in a bad way
leave()
{
if test "$*" != "0"; then
echo "** $0 aborting." | tee -a configure.log
fi
rm -f $test.[co] $test $test$shared_ext $test.gcno ./--version
echo -------------------- >> configure.log
echo >> configure.log
echo >> configure.log
exit $1
}
# process command line options
while test $# -ge 1
do
case "$1" in
-h* | --help)
echo 'usage:' | tee -a configure.log
echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log
echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log
echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log
exit 0 ;;
-p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;;
-e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;;
-l*=* | --libdir=*) libdir=`echo $1 | sed 's/.*=//'`; shift ;;
--sharedlibdir=*) sharedlibdir=`echo $1 | sed 's/.*=//'`; shift ;;
-i*=* | --includedir=*) includedir=`echo $1 | sed 's/.*=//'`;shift ;;
-u*=* | --uname=*) uname=`echo $1 | sed 's/.*=//'`;shift ;;
-p* | --prefix) prefix="$2"; shift; shift ;;
-e* | --eprefix) exec_prefix="$2"; shift; shift ;;
-l* | --libdir) libdir="$2"; shift; shift ;;
-i* | --includedir) includedir="$2"; shift; shift ;;
-s* | --shared | --enable-shared) shared=1; shift ;;
-t | --static) shared=0; shift ;;
--solo) solo=1; shift ;;
--cover) cover=1; shift ;;
-z* | --zprefix) zprefix=1; shift ;;
-6* | --64) build64=1; shift ;;
-a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;;
--sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;;
--localstatedir=*) echo "ignored option: --localstatedir" | tee -a configure.log; shift ;;
-c* | --const) zconst=1; shift ;;
*)
echo "unknown option: $1" | tee -a configure.log
echo "$0 --help for help" | tee -a configure.log
leave 1;;
esac
done
# temporary file name
test=ztest$$
# put arguments in log, also put test file in log if used in arguments
show()
{
case "$*" in
*$test.c*)
echo === $test.c === >> configure.log
cat $test.c >> configure.log
echo === >> configure.log;;
esac
echo $* >> configure.log
}
# check for gcc vs. cc and set compile and link flags based on the system identified by uname
cat > $test.c <<EOF
extern int getchar();
int hello() {return getchar();}
EOF
test -z "$CC" && echo Checking for ${CROSS_PREFIX}gcc... | tee -a configure.log
cc=${CC-${CROSS_PREFIX}gcc}
cflags=${CFLAGS-"-O3"}
# to force the asm version use: CFLAGS="-O3 -DASMV" ./configure
case "$cc" in
*gcc*) gcc=1 ;;
*clang*) gcc=1 ;;
esac
case `$cc -v 2>&1` in
*gcc*) gcc=1 ;;
esac
show $cc -c $test.c
if test "$gcc" -eq 1 && ($cc -c $test.c) >> configure.log 2>&1; then
echo ... using gcc >> configure.log
CC="$cc"
CFLAGS="${CFLAGS--O3} ${ARCHS}"
SFLAGS="${CFLAGS--O3} -fPIC"
LDFLAGS="${LDFLAGS} ${ARCHS}"
if test $build64 -eq 1; then
CFLAGS="${CFLAGS} -m64"
SFLAGS="${SFLAGS} -m64"
fi
if test "${ZLIBGCCWARN}" = "YES"; then
if test "$zconst" -eq 1; then
CFLAGS="${CFLAGS} -Wall -Wextra -Wcast-qual -pedantic -DZLIB_CONST"
else
CFLAGS="${CFLAGS} -Wall -Wextra -pedantic"
fi
fi
if test -z "$uname"; then
uname=`(uname -s || echo unknown) 2>/dev/null`
fi
case "$uname" in
Linux* | linux* | GNU | GNU/* | solaris*)
LDSHARED=${LDSHARED-"$cc -shared -Wl,-soname,libz.so.1,--version-script,zlib.map"} ;;
*BSD | *bsd* | DragonFly)
LDSHARED=${LDSHARED-"$cc -shared -Wl,-soname,libz.so.1,--version-script,zlib.map"}
LDCONFIG="ldconfig -m" ;;
CYGWIN* | Cygwin* | cygwin* | OS/2*)
EXE='.exe' ;;
MINGW* | mingw*)
# temporary bypass
rm -f $test.[co] $test $test$shared_ext
echo "Please use win32/Makefile.gcc instead." | tee -a configure.log
leave 1
LDSHARED=${LDSHARED-"$cc -shared"}
LDSHAREDLIBC=""
EXE='.exe' ;;
QNX*) # This is for QNX6. I suppose that the QNX rule below is for QNX2,QNX4
# (alain.bonnefoy@icbt.com)
LDSHARED=${LDSHARED-"$cc -shared -Wl,-hlibz.so.1"} ;;
HP-UX*)
LDSHARED=${LDSHARED-"$cc -shared $SFLAGS"}
case `(uname -m || echo unknown) 2>/dev/null` in
ia64)
shared_ext='.so'
SHAREDLIB='libz.so' ;;
*)
shared_ext='.sl'
SHAREDLIB='libz.sl' ;;
esac ;;
Darwin* | darwin*)
shared_ext='.dylib'
SHAREDLIB=libz$shared_ext
SHAREDLIBV=libz.$VER$shared_ext
SHAREDLIBM=libz.$VER1$shared_ext
LDSHARED=${LDSHARED-"$cc -dynamiclib -install_name $libdir/$SHAREDLIBM -compatibility_version $VER1 -current_version $VER3"}
if libtool -V 2>&1 | grep Apple > /dev/null; then
AR="libtool"
else
AR="/usr/bin/libtool"
fi
ARFLAGS="-o" ;;
*) LDSHARED=${LDSHARED-"$cc -shared"} ;;
esac
else
# find system name and corresponding cc options
CC=${CC-cc}
gcc=0
echo ... using $CC >> configure.log
if test -z "$uname"; then
uname=`(uname -sr || echo unknown) 2>/dev/null`
fi
case "$uname" in
HP-UX*) SFLAGS=${CFLAGS-"-O +z"}
CFLAGS=${CFLAGS-"-O"}
# LDSHARED=${LDSHARED-"ld -b +vnocompatwarnings"}
LDSHARED=${LDSHARED-"ld -b"}
case `(uname -m || echo unknown) 2>/dev/null` in
ia64)
shared_ext='.so'
SHAREDLIB='libz.so' ;;
*)
shared_ext='.sl'
SHAREDLIB='libz.sl' ;;
esac ;;
IRIX*) SFLAGS=${CFLAGS-"-ansi -O2 -rpath ."}
CFLAGS=${CFLAGS-"-ansi -O2"}
LDSHARED=${LDSHARED-"cc -shared -Wl,-soname,libz.so.1"} ;;
OSF1\ V4*) SFLAGS=${CFLAGS-"-O -std1"}
CFLAGS=${CFLAGS-"-O -std1"}
LDFLAGS="${LDFLAGS} -Wl,-rpath,."
LDSHARED=${LDSHARED-"cc -shared -Wl,-soname,libz.so -Wl,-msym -Wl,-rpath,$(libdir) -Wl,-set_version,${VER}:1.0"} ;;
OSF1*) SFLAGS=${CFLAGS-"-O -std1"}
CFLAGS=${CFLAGS-"-O -std1"}
LDSHARED=${LDSHARED-"cc -shared -Wl,-soname,libz.so.1"} ;;
QNX*) SFLAGS=${CFLAGS-"-4 -O"}
CFLAGS=${CFLAGS-"-4 -O"}
LDSHARED=${LDSHARED-"cc"}
RANLIB=${RANLIB-"true"}
AR="cc"
ARFLAGS="-A" ;;
SCO_SV\ 3.2*) SFLAGS=${CFLAGS-"-O3 -dy -KPIC "}
CFLAGS=${CFLAGS-"-O3"}
LDSHARED=${LDSHARED-"cc -dy -KPIC -G"} ;;
SunOS\ 5* | solaris*)
LDSHARED=${LDSHARED-"cc -G -h libz$shared_ext.$VER1"}
SFLAGS=${CFLAGS-"-fast -KPIC"}
CFLAGS=${CFLAGS-"-fast"}
if test $build64 -eq 1; then
# old versions of SunPRO/Workshop/Studio don't support -m64,
# but newer ones do. Check for it.
flag64=`$CC -flags | egrep -- '^-m64'`
if test x"$flag64" != x"" ; then
CFLAGS="${CFLAGS} -m64"
SFLAGS="${SFLAGS} -m64"
else
case `(uname -m || echo unknown) 2>/dev/null` in
i86*)
SFLAGS="$SFLAGS -xarch=amd64"
CFLAGS="$CFLAGS -xarch=amd64" ;;
*)
SFLAGS="$SFLAGS -xarch=v9"
CFLAGS="$CFLAGS -xarch=v9" ;;
esac
fi
fi
;;
SunOS\ 4*) SFLAGS=${CFLAGS-"-O2 -PIC"}
CFLAGS=${CFLAGS-"-O2"}
LDSHARED=${LDSHARED-"ld"} ;;
SunStudio\ 9*) SFLAGS=${CFLAGS-"-fast -xcode=pic32 -xtarget=ultra3 -xarch=v9b"}
CFLAGS=${CFLAGS-"-fast -xtarget=ultra3 -xarch=v9b"}
LDSHARED=${LDSHARED-"cc -xarch=v9b"} ;;
UNIX_System_V\ 4.2.0)
SFLAGS=${CFLAGS-"-KPIC -O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -G"} ;;
UNIX_SV\ 4.2MP)
SFLAGS=${CFLAGS-"-Kconform_pic -O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -G"} ;;
OpenUNIX\ 5)
SFLAGS=${CFLAGS-"-KPIC -O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -G"} ;;
AIX*) # Courtesy of dbakker@arrayasolutions.com
SFLAGS=${CFLAGS-"-O -qmaxmem=8192"}
CFLAGS=${CFLAGS-"-O -qmaxmem=8192"}
LDSHARED=${LDSHARED-"xlc -G"} ;;
# send working options for other systems to zlib@gzip.org
*) SFLAGS=${CFLAGS-"-O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -shared"} ;;
esac
fi
# destination names for shared library if not defined above
SHAREDLIB=${SHAREDLIB-"libz$shared_ext"}
SHAREDLIBV=${SHAREDLIBV-"libz$shared_ext.$VER"}
SHAREDLIBM=${SHAREDLIBM-"libz$shared_ext.$VER1"}
echo >> configure.log
# define functions for testing compiler and library characteristics and logging the results
cat > $test.c <<EOF
#error error
EOF
if ($CC -c $CFLAGS $test.c) 2>/dev/null; then
try()
{
show $*
test "`( $* ) 2>&1 | tee -a configure.log`" = ""
}
echo - using any output from compiler to indicate an error >> configure.log
else
try()
{
show $*
( $* ) >> configure.log 2>&1
ret=$?
if test $ret -ne 0; then
echo "(exit code "$ret")" >> configure.log
fi
return $ret
}
fi
tryboth()
{
show $*
got=`( $* ) 2>&1`
ret=$?
printf %s "$got" >> configure.log
if test $ret -ne 0; then
return $ret
fi
test "$got" = ""
}
cat > $test.c << EOF
int foo() { return 0; }
EOF
echo "Checking for obsessive-compulsive compiler options..." >> configure.log
if try $CC -c $CFLAGS $test.c; then
:
else
echo "Compiler error reporting is too harsh for $0 (perhaps remove -Werror)." | tee -a configure.log
leave 1
fi
echo >> configure.log
# see if shared library build supported
cat > $test.c <<EOF
extern int getchar();
int hello() {return getchar();}
EOF
if test $shared -eq 1; then
echo Checking for shared library support... | tee -a configure.log
# we must test in two steps (cc then ld), required at least on SunOS 4.x
if try $CC -w -c $SFLAGS $test.c &&
try $LDSHARED $SFLAGS -o $test$shared_ext $test.o; then
echo Building shared library $SHAREDLIBV with $CC. | tee -a configure.log
elif test -z "$old_cc" -a -z "$old_cflags"; then
echo No shared library support. | tee -a configure.log
shared=0;
else
echo 'No shared library support; try without defining CC and CFLAGS' | tee -a configure.log
shared=0;
fi
fi
if test $shared -eq 0; then
LDSHARED="$CC"
ALL="static"
TEST="all teststatic"
SHAREDLIB=""
SHAREDLIBV=""
SHAREDLIBM=""
echo Building static library $STATICLIB version $VER with $CC. | tee -a configure.log
else
ALL="static shared"
TEST="all teststatic testshared"
fi
# check for underscores in external names for use by assembler code
CPP=${CPP-"$CC -E"}
case $CFLAGS in
*ASMV*)
echo >> configure.log
show "$NM $test.o | grep _hello"
if test "`$NM $test.o | grep _hello | tee -a configure.log`" = ""; then
CPP="$CPP -DNO_UNDERLINE"
echo Checking for underline in external names... No. | tee -a configure.log
else
echo Checking for underline in external names... Yes. | tee -a configure.log
fi ;;
esac
echo >> configure.log
# check for large file support, and if none, check for fseeko()
cat > $test.c <<EOF
#include <sys/types.h>
off64_t dummy = 0;
EOF
if try $CC -c $CFLAGS -D_LARGEFILE64_SOURCE=1 $test.c; then
CFLAGS="${CFLAGS} -D_LARGEFILE64_SOURCE=1"
SFLAGS="${SFLAGS} -D_LARGEFILE64_SOURCE=1"
ALL="${ALL} all64"
TEST="${TEST} test64"
echo "Checking for off64_t... Yes." | tee -a configure.log
echo "Checking for fseeko... Yes." | tee -a configure.log
else
echo "Checking for off64_t... No." | tee -a configure.log
echo >> configure.log
cat > $test.c <<EOF
#include <stdio.h>
int main(void) {
fseeko(NULL, 0, 0);
return 0;
}
EOF
if try $CC $CFLAGS -o $test $test.c; then
echo "Checking for fseeko... Yes." | tee -a configure.log
else
CFLAGS="${CFLAGS} -DNO_FSEEKO"
SFLAGS="${SFLAGS} -DNO_FSEEKO"
echo "Checking for fseeko... No." | tee -a configure.log
fi
fi
echo >> configure.log
# check for strerror() for use by gz* functions
cat > $test.c <<EOF
#include <string.h>
#include <errno.h>
int main() { return strlen(strerror(errno)); }
EOF
if try $CC $CFLAGS -o $test $test.c; then
echo "Checking for strerror... Yes." | tee -a configure.log
else
CFLAGS="${CFLAGS} -DNO_STRERROR"
SFLAGS="${SFLAGS} -DNO_STRERROR"
echo "Checking for strerror... No." | tee -a configure.log
fi
# copy clean zconf.h for subsequent edits
cp -p zconf.h.in zconf.h
echo >> configure.log
# check for unistd.h and save result in zconf.h
cat > $test.c <<EOF
#include <unistd.h>
int main() { return 0; }
EOF
if try $CC -c $CFLAGS $test.c; then
sed < zconf.h "/^#ifdef HAVE_UNISTD_H.* may be/s/def HAVE_UNISTD_H\(.*\) may be/ 1\1 was/" > zconf.temp.h
mv zconf.temp.h zconf.h
echo "Checking for unistd.h... Yes." | tee -a configure.log
else
echo "Checking for unistd.h... No." | tee -a configure.log
fi
echo >> configure.log
# check for stdarg.h and save result in zconf.h
cat > $test.c <<EOF
#include <stdarg.h>
int main() { return 0; }
EOF
if try $CC -c $CFLAGS $test.c; then
sed < zconf.h "/^#ifdef HAVE_STDARG_H.* may be/s/def HAVE_STDARG_H\(.*\) may be/ 1\1 was/" > zconf.temp.h
mv zconf.temp.h zconf.h
echo "Checking for stdarg.h... Yes." | tee -a configure.log
else
echo "Checking for stdarg.h... No." | tee -a configure.log
fi
# if the z_ prefix was requested, save that in zconf.h
if test $zprefix -eq 1; then
sed < zconf.h "/#ifdef Z_PREFIX.* may be/s/def Z_PREFIX\(.*\) may be/ 1\1 was/" > zconf.temp.h
mv zconf.temp.h zconf.h
echo >> configure.log
echo "Using z_ prefix on all symbols." | tee -a configure.log
fi
# if --solo compilation was requested, save that in zconf.h and remove gz stuff from object lists
if test $solo -eq 1; then
sed '/#define ZCONF_H/a\
#define Z_SOLO
' < zconf.h > zconf.temp.h
mv zconf.temp.h zconf.h
OBJC='$(OBJZ)'
PIC_OBJC='$(PIC_OBJZ)'
fi
# if code coverage testing was requested, use older gcc if defined, e.g. "gcc-4.2" on Mac OS X
if test $cover -eq 1; then
CFLAGS="${CFLAGS} -fprofile-arcs -ftest-coverage"
if test -n "$GCC_CLASSIC"; then
CC=$GCC_CLASSIC
fi
fi
echo >> configure.log
# conduct a series of tests to resolve eight possible cases of using "vs" or "s" printf functions
# (using stdarg or not), with or without "n" (proving size of buffer), and with or without a
# return value. The most secure result is vsnprintf() with a return value. snprintf() with a
# return value is secure as well, but then gzprintf() will be limited to 20 arguments.
cat > $test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
#include "zconf.h"
int main()
{
#ifndef STDC
choke me
#endif
return 0;
}
EOF
if try $CC -c $CFLAGS $test.c; then
echo "Checking whether to use vs[n]printf() or s[n]printf()... using vs[n]printf()." | tee -a configure.log
echo >> configure.log
cat > $test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
int mytest(const char *fmt, ...)
{
char buf[20];
va_list ap;
va_start(ap, fmt);
vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
return 0;
}
int main()
{
return (mytest("Hello%d\n", 1));
}
EOF
if try $CC $CFLAGS -o $test $test.c; then
echo "Checking for vsnprintf() in stdio.h... Yes." | tee -a configure.log
echo >> configure.log
cat >$test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
int mytest(const char *fmt, ...)
{
int n;
char buf[20];
va_list ap;
va_start(ap, fmt);
n = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
return n;
}
int main()
{
return (mytest("Hello%d\n", 1));
}
EOF
if try $CC -c $CFLAGS $test.c; then
echo "Checking for return value of vsnprintf()... Yes." | tee -a configure.log
else
CFLAGS="$CFLAGS -DHAS_vsnprintf_void"
SFLAGS="$SFLAGS -DHAS_vsnprintf_void"
echo "Checking for return value of vsnprintf()... No." | tee -a configure.log
echo " WARNING: apparently vsnprintf() does not return a value. zlib" | tee -a configure.log
echo " can build but will be open to possible string-format security" | tee -a configure.log
echo " vulnerabilities." | tee -a configure.log
fi
else
CFLAGS="$CFLAGS -DNO_vsnprintf"
SFLAGS="$SFLAGS -DNO_vsnprintf"
echo "Checking for vsnprintf() in stdio.h... No." | tee -a configure.log
echo " WARNING: vsnprintf() not found, falling back to vsprintf(). zlib" | tee -a configure.log
echo " can build but will be open to possible buffer-overflow security" | tee -a configure.log
echo " vulnerabilities." | tee -a configure.log
echo >> configure.log
cat >$test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
int mytest(const char *fmt, ...)
{
int n;
char buf[20];
va_list ap;
va_start(ap, fmt);
n = vsprintf(buf, fmt, ap);
va_end(ap);
return n;
}
int main()
{
return (mytest("Hello%d\n", 1));
}
EOF
if try $CC -c $CFLAGS $test.c; then
echo "Checking for return value of vsprintf()... Yes." | tee -a configure.log
else
CFLAGS="$CFLAGS -DHAS_vsprintf_void"
SFLAGS="$SFLAGS -DHAS_vsprintf_void"
echo "Checking for return value of vsprintf()... No." | tee -a configure.log
echo " WARNING: apparently vsprintf() does not return a value. zlib" | tee -a configure.log
echo " can build but will be open to possible string-format security" | tee -a configure.log
echo " vulnerabilities." | tee -a configure.log
fi
fi
else
echo "Checking whether to use vs[n]printf() or s[n]printf()... using s[n]printf()." | tee -a configure.log
echo >> configure.log
cat >$test.c <<EOF
#include <stdio.h>
int mytest()
{
char buf[20];
snprintf(buf, sizeof(buf), "%s", "foo");
return 0;
}
int main()
{
return (mytest());
}
EOF
if try $CC $CFLAGS -o $test $test.c; then
echo "Checking for snprintf() in stdio.h... Yes." | tee -a configure.log
echo >> configure.log
cat >$test.c <<EOF
#include <stdio.h>
int mytest()
{
char buf[20];
return snprintf(buf, sizeof(buf), "%s", "foo");
}
int main()
{
return (mytest());
}
EOF
if try $CC -c $CFLAGS $test.c; then
echo "Checking for return value of snprintf()... Yes." | tee -a configure.log
else
CFLAGS="$CFLAGS -DHAS_snprintf_void"
SFLAGS="$SFLAGS -DHAS_snprintf_void"
echo "Checking for return value of snprintf()... No." | tee -a configure.log
echo " WARNING: apparently snprintf() does not return a value. zlib" | tee -a configure.log
echo " can build but will be open to possible string-format security" | tee -a configure.log
echo " vulnerabilities." | tee -a configure.log
fi
else
CFLAGS="$CFLAGS -DNO_snprintf"
SFLAGS="$SFLAGS -DNO_snprintf"
echo "Checking for snprintf() in stdio.h... No." | tee -a configure.log
echo " WARNING: snprintf() not found, falling back to sprintf(). zlib" | tee -a configure.log
echo " can build but will be open to possible buffer-overflow security" | tee -a configure.log
echo " vulnerabilities." | tee -a configure.log
echo >> configure.log
cat >$test.c <<EOF
#include <stdio.h>
int mytest()
{
char buf[20];
return sprintf(buf, "%s", "foo");
}
int main()
{
return (mytest());
}
EOF
if try $CC -c $CFLAGS $test.c; then
echo "Checking for return value of sprintf()... Yes." | tee -a configure.log
else
CFLAGS="$CFLAGS -DHAS_sprintf_void"
SFLAGS="$SFLAGS -DHAS_sprintf_void"
echo "Checking for return value of sprintf()... No." | tee -a configure.log
echo " WARNING: apparently sprintf() does not return a value. zlib" | tee -a configure.log
echo " can build but will be open to possible string-format security" | tee -a configure.log
echo " vulnerabilities." | tee -a configure.log
fi
fi
fi
# see if we can hide zlib internal symbols that are linked between separate source files
if test "$gcc" -eq 1; then
echo >> configure.log
cat > $test.c <<EOF
#define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
int ZLIB_INTERNAL foo;
int main()
{
return 0;
}
EOF
if tryboth $CC -c $CFLAGS $test.c; then
CFLAGS="$CFLAGS -DHAVE_HIDDEN"
SFLAGS="$SFLAGS -DHAVE_HIDDEN"
echo "Checking for attribute(visibility) support... Yes." | tee -a configure.log
else
echo "Checking for attribute(visibility) support... No." | tee -a configure.log
fi
fi
# show the results in the log
echo >> configure.log
echo ALL = $ALL >> configure.log
echo AR = $AR >> configure.log
echo ARFLAGS = $ARFLAGS >> configure.log
echo CC = $CC >> configure.log
echo CFLAGS = $CFLAGS >> configure.log
echo CPP = $CPP >> configure.log
echo EXE = $EXE >> configure.log
echo LDCONFIG = $LDCONFIG >> configure.log
echo LDFLAGS = $LDFLAGS >> configure.log
echo LDSHARED = $LDSHARED >> configure.log
echo LDSHAREDLIBC = $LDSHAREDLIBC >> configure.log
echo OBJC = $OBJC >> configure.log
echo PIC_OBJC = $PIC_OBJC >> configure.log
echo RANLIB = $RANLIB >> configure.log
echo SFLAGS = $SFLAGS >> configure.log
echo SHAREDLIB = $SHAREDLIB >> configure.log
echo SHAREDLIBM = $SHAREDLIBM >> configure.log
echo SHAREDLIBV = $SHAREDLIBV >> configure.log
echo STATICLIB = $STATICLIB >> configure.log
echo TEST = $TEST >> configure.log
echo VER = $VER >> configure.log
echo Z_U4 = $Z_U4 >> configure.log
echo exec_prefix = $exec_prefix >> configure.log
echo includedir = $includedir >> configure.log
echo libdir = $libdir >> configure.log
echo mandir = $mandir >> configure.log
echo prefix = $prefix >> configure.log
echo sharedlibdir = $sharedlibdir >> configure.log
echo uname = $uname >> configure.log
# udpate Makefile with the configure results
sed < Makefile.in "
/^CC *=/s#=.*#=$CC#
/^CFLAGS *=/s#=.*#=$CFLAGS#
/^SFLAGS *=/s#=.*#=$SFLAGS#
/^LDFLAGS *=/s#=.*#=$LDFLAGS#
/^LDSHARED *=/s#=.*#=$LDSHARED#
/^CPP *=/s#=.*#=$CPP#
/^STATICLIB *=/s#=.*#=$STATICLIB#
/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
/^SHAREDLIBM *=/s#=.*#=$SHAREDLIBM#
/^AR *=/s#=.*#=$AR#
/^ARFLAGS *=/s#=.*#=$ARFLAGS#
/^RANLIB *=/s#=.*#=$RANLIB#
/^LDCONFIG *=/s#=.*#=$LDCONFIG#
/^LDSHAREDLIBC *=/s#=.*#=$LDSHAREDLIBC#
/^EXE *=/s#=.*#=$EXE#
/^prefix *=/s#=.*#=$prefix#
/^exec_prefix *=/s#=.*#=$exec_prefix#
/^libdir *=/s#=.*#=$libdir#
/^sharedlibdir *=/s#=.*#=$sharedlibdir#
/^includedir *=/s#=.*#=$includedir#
/^mandir *=/s#=.*#=$mandir#
/^OBJC *=/s#=.*#= $OBJC#
/^PIC_OBJC *=/s#=.*#= $PIC_OBJC#
/^all: */s#:.*#: $ALL#
/^test: */s#:.*#: $TEST#
" > Makefile
# create zlib.pc with the configure results
sed < zlib.pc.in "
/^CC *=/s#=.*#=$CC#
/^CFLAGS *=/s#=.*#=$CFLAGS#
/^CPP *=/s#=.*#=$CPP#
/^LDSHARED *=/s#=.*#=$LDSHARED#
/^STATICLIB *=/s#=.*#=$STATICLIB#
/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
/^SHAREDLIBM *=/s#=.*#=$SHAREDLIBM#
/^AR *=/s#=.*#=$AR#
/^ARFLAGS *=/s#=.*#=$ARFLAGS#
/^RANLIB *=/s#=.*#=$RANLIB#
/^EXE *=/s#=.*#=$EXE#
/^prefix *=/s#=.*#=$prefix#
/^exec_prefix *=/s#=.*#=$exec_prefix#
/^libdir *=/s#=.*#=$libdir#
/^sharedlibdir *=/s#=.*#=$sharedlibdir#
/^includedir *=/s#=.*#=$includedir#
/^mandir *=/s#=.*#=$mandir#
/^LDFLAGS *=/s#=.*#=$LDFLAGS#
" | sed -e "
s/\@VERSION\@/$VER/g;
" > zlib.pc
# done
leave 0

Some files were not shown because too many files have changed in this diff Show More