AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python 2.7.3 made them unavailable from the python.org web site.

These files are a subset of the python-2.7.2.tgz distribution from python.org.  Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution.

Signed-off-by: daryl.mcdaniel@intel.com


git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
darylm503
2012-04-16 22:12:42 +00:00
parent cbc6b5e545
commit 4710c53dca
2106 changed files with 871583 additions and 0 deletions

View File

@@ -0,0 +1,79 @@
To generate or modify mapping headers
-------------------------------------
Mapping headers are imported from CJKCodecs as pre-generated form.
If you need to tweak or add something on it, please look at tools/
subdirectory of CJKCodecs' distribution.
Notes on implmentation characteristics of each codecs
-----------------------------------------------------
1) Big5 codec
The big5 codec maps the following characters as cp950 does rather
than conforming Unicode.org's that maps to 0xFFFD.
BIG5 Unicode Description
0xA15A 0x2574 SPACING UNDERSCORE
0xA1C3 0xFFE3 SPACING HEAVY OVERSCORE
0xA1C5 0x02CD SPACING HEAVY UNDERSCORE
0xA1FE 0xFF0F LT DIAG UP RIGHT TO LOW LEFT
0xA240 0xFF3C LT DIAG UP LEFT TO LOW RIGHT
0xA2CC 0x5341 HANGZHOU NUMERAL TEN
0xA2CE 0x5345 HANGZHOU NUMERAL THIRTY
Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
big5 codes already, a roundtrip compatibility is not guaranteed for
them.
2) cp932 codec
To conform to Windows's real mapping, cp932 codec maps the following
codepoints in addition of the official cp932 mapping.
CP932 Unicode Description
0x80 0x80 UNDEFINED
0xA0 0xF8F0 UNDEFINED
0xFD 0xF8F1 UNDEFINED
0xFE 0xF8F2 UNDEFINED
0xFF 0xF8F3 UNDEFINED
3) euc-jisx0213 codec
The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
unicode U+FF3C instead of U+005C as on unicode.org's mapping.
Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
is shown as a full width character, mapping to U+FF3C can make
more sense.
The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
overlapped by each other, it doesn't bother standard conformations
(and JIS X 0213 Plane 2 is intended to use so.) On encoding
sessions, the codec will try to encode kanji characters in this
order:
JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212
4) euc-jp codec
The euc-jp codec is a compatibility instance on these points:
- U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
- U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
- U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)
5) shift-jis codec
The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
instead of using JIS X 0201 for compatibility. The differences are:
- U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
- U+007E TILDE is mapped to SHIFT-JIS 0x7e.
- U+FF3C FULL-WIDTH REVERSE SOLIDUS is mapped to SHIFT-JIS 815f.

View File

@@ -0,0 +1,444 @@
/*
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_cn.h"
/**
* hz is predefined as 100 on AIX. So we undefine it to avoid
* conflict against hz codec's.
*/
#ifdef _AIX
#undef hz
#endif
/* GBK and GB2312 map differently in few codepoints that are listed below:
*
* gb2312 gbk
* A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
* A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
* A844 undefined U+2015 HORIZONTAL BAR
*/
#define GBK_DECODE(dc1, dc2, assi) \
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
else TRYMAP_DEC(gbkext, assi, dc1, dc2);
#define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
else if ((code) == 0x2015) (assi) = 0xa844; \
else if ((code) == 0x00b7) (assi) = 0xa1a4; \
else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code));
/*
* GB2312 codec
*/
ENCODER(gb2312)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(gb2312)
{
while (inleft > 0) {
unsigned char c = **inbuf;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* GBK codec
*/
ENCODER(gbk)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code)
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gbk)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
GBK_DECODE(c, IN2, **outbuf)
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* GB18030 codec
*/
ENCODER(gb18030)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
return 2; /* surrogates pair */
#else
return 1;
#endif
else if (c >= 0x10000) {
ucs4_t tc = c - 0x10000;
REQUIRE_OUTBUF(4)
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)(tc + 0x90))
#if Py_UNICODE_SIZE == 2
NEXT(2, 4) /* surrogates pair */
#else
NEXT(1, 4)
#endif
continue;
}
REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code)
else TRYMAP_ENC(gb18030ext, code, c);
else {
const struct _gb18030_to_unibmp_ranges *utrrange;
REQUIRE_OUTBUF(4)
for (utrrange = gb18030_to_unibmp_ranges;
utrrange->first != 0;
utrrange++)
if (utrrange->first <= c &&
c <= utrrange->last) {
Py_UNICODE tc;
tc = c - utrrange->first +
utrrange->base;
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)tc + 0x81)
NEXT(1, 4)
break;
}
if (utrrange->first == 0)
return 1;
continue;
}
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gb18030)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
ucs4_t lseq;
REQUIRE_INBUF(4)
c3 = IN3;
c4 = IN4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
return 4;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
(ucs4_t)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
OUT1(utr->first - utr->base + lseq)
NEXT(4, 1)
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
* 1260 + (ucs4_t)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
WRITEUCS4(lseq);
NEXT_IN(4)
continue;
}
}
return 4;
}
GBK_DECODE(c, c2, **outbuf)
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* HZ codec
*/
ENCODER_INIT(hz)
{
state->i = 0;
return 0;
}
ENCODER_RESET(hz)
{
if (state->i != 0) {
WRITE2('~', '}')
state->i = 0;
NEXT_OUT(2)
}
return 0;
}
ENCODER(hz)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
WRITE1((unsigned char)c)
NEXT(1, 1)
}
else {
WRITE3('~', '}', (unsigned char)c)
NEXT(1, 3)
state->i = 0;
}
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
if (state->i == 0) {
WRITE4('~', '{', code >> 8, code & 0xff)
NEXT(1, 4)
state->i = 1;
}
else {
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
}
}
return 0;
}
DECODER_INIT(hz)
{
state->i = 0;
return 0;
}
DECODER_RESET(hz)
{
state->i = 0;
return 0;
}
DECODER(hz)
{
while (inleft > 0) {
unsigned char c = IN1;
if (c == '~') {
unsigned char c2 = IN2;
REQUIRE_INBUF(2)
if (c2 == '~') {
WRITE1('~')
NEXT(2, 1)
continue;
}
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else
return 2;
NEXT(2, 0);
continue;
}
if (c & 0x80)
return 1;
if (state->i == 0) { /* ASCII mode */
WRITE1(c)
NEXT(1, 1)
}
else { /* GB mode */
REQUIRE_INBUF(2)
REQUIRE_OUTBUF(1)
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
NEXT(2, 1)
}
else
return 2;
}
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(gb2312)
MAPPING_DECONLY(gbkext)
MAPPING_ENCONLY(gbcommon)
MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(gb2312)
CODEC_STATELESS(gbk)
CODEC_STATELESS(gb18030)
CODEC_STATEFUL(hz)
END_CODECS_LIST
I_AM_A_MODULE_FOR(cn)

View File

@@ -0,0 +1,183 @@
/*
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#define USING_IMPORTED_MAPS
#include "cjkcodecs.h"
#include "mappings_hk.h"
/*
* BIG5HKSCS codec
*/
static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;
CODEC_INIT(big5hkscs)
{
static int initialized = 0;
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
return -1;
initialized = 1;
return 0;
}
/*
* There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
* U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
* U+00CA U+030C -> 8864
* U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
* U+00EA U+030C -> 88a5
* These are handled by not mapping tables but a hand-written code.
*/
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
ENCODER(big5hkscs)
{
while (inleft > 0) {
ucs4_t c = **inbuf;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
REQUIRE_OUTBUF(2)
if (c < 0x10000) {
TRYMAP_ENC(big5hkscs_bmp, code, c) {
if (code == MULTIC) {
if (inleft >= 2 &&
((c & 0xffdf) == 0x00ca) &&
(((*inbuf)[1] & 0xfff7) == 0x0304)) {
code = big5hkscs_pairenc_table[
((c >> 4) |
((*inbuf)[1] >> 3)) & 3];
insize = 2;
}
else if (inleft < 2 &&
!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
else {
if (c == 0xca)
code = 0x8866;
else /* c == 0xea */
code = 0x88a7;
}
}
}
else TRYMAP_ENC(big5, code, c);
else return 1;
}
else if (c < 0x20000)
return insize;
else if (c < 0x30000) {
TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
else return insize;
}
else
return insize;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(insize, 2)
}
return 0;
}
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
DECODER(big5hkscs)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t decoded;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
goto hkscsdec;
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
else
hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
int s = BH2S(c, IN2);
const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe);
assert(0x40 <= IN2 && IN2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x87, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_12130;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21924;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
WRITEUCS4(decoded | 0x20000)
NEXT_IN(2)
}
else {
OUT1(decoded)
NEXT(2, 1)
}
}
else {
switch ((c << 8) | IN2) {
case 0x8862: WRITE2(0x00ca, 0x0304); break;
case 0x8864: WRITE2(0x00ca, 0x030c); break;
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
default: return 2;
}
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
}
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST
I_AM_A_MODULE_FOR(hk)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,731 @@
/*
* _codecs_jp.c: Codecs collection for Japanese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#define USING_BINARY_PAIR_SEARCH
#define EMPBASE 0x20000
#include "cjkcodecs.h"
#include "mappings_jp.h"
#include "mappings_jisx0213_pair.h"
#include "alg_jisx0201.h"
#include "emu_jisx0213_2000.h"
/*
* CP932 codec
*/
ENCODER(cp932)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
else if (c >= 0xff61 && c <= 0xff9f) {
WRITE1(c - 0xfec0)
NEXT(1, 1)
continue;
}
else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatibility */
REQUIRE_OUTBUF(1)
if (c == 0xf8f0)
OUT1(0xa0)
else
OUT1(c - 0xfef1 + 0xfd)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp932ext, code, c) {
OUT1(code >> 8)
OUT2(code & 0xff)
}
else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
/* JIS X 0208 */
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
c1 = (Py_UNICODE)(c - 0xe000) / 188;
c2 = (Py_UNICODE)(c - 0xe000) % 188;
OUT1(c1 + 0xf0)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else
return 1;
NEXT(1, 2)
}
return 0;
}
DECODER(cp932)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c <= 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
OUT1(0xf8f0) /* half-width katakana */
else
OUT1(0xfec0 + c)
NEXT(1, 1)
continue;
}
else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
OUT1(0xf8f1 - 0xfd + c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
(c2 >= 0x80 && c2 <= 0xfc))
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
return 2;
}
else
return 2;
NEXT(2, 1)
}
return 0;
}
/*
* EUC-JIS-2004 codec
*/
ENCODER(euc_jis_2004)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
if (c <= 0xFFFF) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
code = find_pairencmap(
(ucs2_t)c, (*inbuf)[1],
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
}
else TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
else if (c == 0xff3c)
/* F/W REVERSE SOLIDUS (see NOTES) */
code = 0x2140;
else if (c == 0xff5e)
/* F/W TILDE (see NOTES) */
code = 0x2232;
else
return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
else return insize;
}
else
return insize;
if (code & 0x8000) {
/* Codeset 2 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(insize, 3)
} else {
/* Codeset 1 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(insize, 2)
}
}
return 0;
}
DECODER(euc_jis_2004)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t code;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
}
else
return 2;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3)
c2 = IN2 ^ 0x80;
c3 = IN3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(3)
continue;
}
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
else return 3;
NEXT(3, 1)
}
else {
unsigned char c2;
REQUIRE_INBUF(2)
c ^= 0x80;
c2 = IN2 ^ 0x80;
/* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
}
else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT(2, 2)
continue;
}
else return 2;
NEXT(2, 1)
}
}
return 0;
}
/*
* EUC-JP codec
*/
ENCODER(euc_jp)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
WRITE1(0x5c);
NEXT(1, 1)
continue;
} else if (c == 0x203e) { /* OVERLINE */
WRITE1(0x7e);
NEXT(1, 1)
continue;
}
#endif
else
return 1;
if (code & 0x8000) {
/* JIS X 0212 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(1, 3)
} else {
/* JIS X 0208 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(1, 2)
}
}
return 0;
}
DECODER(euc_jp)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
}
else
return 2;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3)
c2 = IN2;
c3 = IN3;
/* JIS X 0212 */
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
NEXT(3, 1)
}
else
return 3;
}
else {
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0)
/* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else
#endif
TRYMAP_DEC(jisx0208, **outbuf,
c ^ 0x80, c2 ^ 0x80) ;
else return 2;
NEXT(2, 1)
}
}
return 0;
}
/*
* SHIFT_JIS codec
*/
ENCODER(shift_jis)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
#ifdef STRICT_BUILD
JISX0201_R_ENCODE(c, code)
#else
if (c < 0x80) code = c;
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
else UCS4INVALID(c)
else code = NOCHAR;
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
REQUIRE_OUTBUF(1)
OUT1((unsigned char)code)
NEXT(1, 1)
continue;
}
REQUIRE_OUTBUF(2)
if (code == NOCHAR) {
TRYMAP_ENC(jisxcommon, code, c);
#ifndef STRICT_BUILD
else if (c == 0xff3c)
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
#endif
else
return 1;
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
}
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
NEXT(1, 2)
}
return 0;
}
DECODER(shift_jis)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, **outbuf)
#else
if (c < 0x80) **outbuf = c;
#endif
else JISX0201_K_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
unsigned char c1, c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
#ifndef STRICT_BUILD
if (c1 == 0x21 && c2 == 0x40) {
/* FULL-WIDTH REVERSE SOLIDUS */
OUT1(0xff3c)
NEXT(2, 1)
continue;
}
#endif
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT(2, 1)
continue;
}
else
return 2;
}
else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
/*
* SHIFT_JIS-2004 codec
*/
ENCODER(shift_jis_2004)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code = NOCHAR;
int c1, c2;
Py_ssize_t insize;
JISX0201_ENCODE(c, code)
else DECODE_SURROGATE(c)
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
REQUIRE_OUTBUF(2)
insize = GET_INSIZE(c);
if (code == NOCHAR) {
if (c <= 0xffff) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap
((ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
code = find_pairencmap(
(ucs2_t)c, IN2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
insize = 2;
}
}
}
else TRYMAP_ENC(jisxcommon, code, c) {
/* abandon JIS X 0212 codes */
if (code & 0x8000)
return 1;
}
else return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
else return insize;
}
else
return insize;
}
c1 = code >> 8;
c2 = (code & 0xff) - 0x21;
if (c1 & 0x80) { /* Plane 2 */
if (c1 >= 0xee) c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
else c1 -= 0x43;
}
else /* Plane 1 */
c1 -= 0x21;
if (c1 & 1) c2 += 0x5e;
c1 >>= 1;
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
NEXT(insize, 2)
}
return 0;
}
DECODER(shift_jis_2004)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
JISX0201_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
unsigned char c1, c2;
ucs4_t code;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
if (c1 < 0x5e) { /* Plane 1 */
c1 += 0x21;
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
c1, c2)
else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT_OUT(1)
}
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
c1, c2) {
NEXT_OUT(1)
}
else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
WRITEUCS4(EMPBASE | code)
}
else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT_OUT(2)
}
else
return 2;
NEXT_IN(2)
}
else { /* Plane 2 */
if (c1 >= 0x67) c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
else c1 -= 0x3d;
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
c1, c2)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
c1, c2) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
}
else
return 2;
NEXT(2, 1)
}
continue;
}
else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(jisx0208)
MAPPING_DECONLY(jisx0212)
MAPPING_ENCONLY(jisxcommon)
MAPPING_DECONLY(jisx0213_1_bmp)
MAPPING_DECONLY(jisx0213_2_bmp)
MAPPING_ENCONLY(jisx0213_bmp)
MAPPING_DECONLY(jisx0213_1_emp)
MAPPING_DECONLY(jisx0213_2_emp)
MAPPING_ENCONLY(jisx0213_emp)
MAPPING_ENCDEC(jisx0213_pair)
MAPPING_ENCDEC(cp932ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(shift_jis)
CODEC_STATELESS(cp932)
CODEC_STATELESS(euc_jp)
CODEC_STATELESS(shift_jis_2004)
CODEC_STATELESS(euc_jis_2004)
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
END_CODECS_LIST
I_AM_A_MODULE_FOR(jp)

View File

@@ -0,0 +1,452 @@
/*
* _codecs_kr.c: Codecs collection for Korean encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_kr.h"
/*
* EUC-KR codec
*/
#define EUCKR_JAMO_FIRSTBYTE 0xA4
#define EUCKR_JAMO_FILLER 0xD4
static const unsigned char u2cgk_choseong[19] = {
0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
0xbc, 0xbd, 0xbe
};
static const unsigned char u2cgk_jungseong[21] = {
0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
0xcf, 0xd0, 0xd1, 0xd2, 0xd3
};
static const unsigned char u2cgk_jongseong[28] = {
0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
0xbb, 0xbc, 0xbd, 0xbe
};
ENCODER(euc_kr)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
if ((code & 0x8000) == 0) {
/* KS X 1001 coded character */
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
else { /* Mapping is found in CP949 extension,
* but we encode it in KS X 1001:1998 Annex 3,
* make-up sequence for EUC-KR. */
REQUIRE_OUTBUF(8)
/* syllable composition precedence */
OUT1(EUCKR_JAMO_FIRSTBYTE)
OUT2(EUCKR_JAMO_FILLER)
/* All codepoints in CP949 extension are in unicode
* Hangul Syllable area. */
assert(0xac00 <= c && c <= 0xd7a3);
c -= 0xac00;
OUT3(EUCKR_JAMO_FIRSTBYTE)
OUT4(u2cgk_choseong[c / 588])
NEXT_OUT(4)
OUT1(EUCKR_JAMO_FIRSTBYTE)
OUT2(u2cgk_jungseong[(c / 28) % 21])
OUT3(EUCKR_JAMO_FIRSTBYTE)
OUT4(u2cgk_jongseong[c % 28])
NEXT(1, 4)
}
}
return 0;
}
#define NONE 127
static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
0, 1, NONE, 2, NONE, NONE, 3, 4,
5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
6, 7, 8, NONE, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18
};
static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
1, 2, 3, 4, 5, 6, 7, NONE,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, NONE, 18, 19, 20, 21, 22,
NONE, 23, 24, 25, 26, 27
};
DECODER(euc_kr)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
if (c == EUCKR_JAMO_FIRSTBYTE &&
IN2 == EUCKR_JAMO_FILLER) {
/* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR cho, jung, jong;
REQUIRE_INBUF(8)
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
return 8;
c = (*inbuf)[3];
if (0xa1 <= c && c <= 0xbe)
cho = cgk2u_choseong[c - 0xa1];
else
cho = NONE;
c = (*inbuf)[5];
jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
c = (*inbuf)[7];
if (c == EUCKR_JAMO_FILLER)
jong = 0;
else if (0xa1 <= c && c <= 0xbe)
jong = cgk2u_jongseong[c - 0xa1];
else
jong = NONE;
if (cho == NONE || jung == NONE || jong == NONE)
return 8;
OUT1(0xac00 + cho*588 + jung*28 + jong);
NEXT(8, 1)
}
else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
else
return 2;
}
return 0;
}
#undef NONE
/*
* CP949 codec
*/
ENCODER(cp949)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2(code & 0xFF) /* MSB set: CP949 */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
NEXT(1, 2)
}
return 0;
}
DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* JOHAB codec
*/
static const unsigned char u2johabidx_choseong[32] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14,
};
static const unsigned char u2johabidx_jungseong[32] = {
0x03, 0x04, 0x05, 0x06, 0x07,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x1a, 0x1b, 0x1c, 0x1d,
};
static const unsigned char u2johabidx_jongseong[32] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
};
static const DBCHAR u2johabjamo[] = {
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
0x8741, 0x8761, 0x8781, 0x87a1,
};
ENCODER(johab)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
if (c >= 0xac00 && c <= 0xd7a3) {
c -= 0xac00;
code = 0x8000 |
(u2johabidx_choseong[c / 588] << 10) |
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
u2johabidx_jongseong[c % 28];
}
else if (c >= 0x3131 && c <= 0x3163)
code = u2johabjamo[c - 0x3131];
else TRYMAP_ENC(cp949, code, c) {
unsigned char c1, c2, t2;
unsigned short t1;
assert((code & 0x8000) == 0);
c1 = code >> 8;
c2 = code & 0xff;
if (((c1 >= 0x21 && c1 <= 0x2c) ||
(c1 >= 0x4a && c1 <= 0x7d)) &&
(c2 >= 0x21 && c2 <= 0x7e)) {
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
(c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
OUT1(t1 >> 1)
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
NEXT(1, 2)
continue;
}
else
return 1;
}
else
return 1;
OUT1(code >> 8)
OUT2(code & 0xff)
NEXT(1, 2)
}
return 0;
}
#define FILL 0xfd
#define NONE 0xff
static const unsigned char johabidx_choseong[32] = {
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabidx_jungseong[32] = {
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
};
static const unsigned char johabidx_jongseong[32] = {
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
};
static const unsigned char johabjamo_choseong[32] = {
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabjamo_jungseong[32] = {
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
};
static const unsigned char johabjamo_jongseong[32] = {
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
};
DECODER(johab)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
if (c < 0xd8) {
/* johab hangul */
unsigned char c_cho, c_jung, c_jong;
unsigned char i_cho, i_jung, i_jong;
c_cho = (c >> 2) & 0x1f;
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
c_jong = c2 & 0x1f;
i_cho = johabidx_choseong[c_cho];
i_jung = johabidx_jungseong[c_jung];
i_jong = johabidx_jongseong[c_jong];
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
return 2;
/* we don't use U+1100 hangul jamo yet. */
if (i_cho == FILL) {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3000)
else
OUT1(0x3100 |
johabjamo_jongseong[c_jong])
}
else {
if (i_jong == FILL)
OUT1(0x3100 |
johabjamo_jungseong[c_jung])
else
return 2;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3100 |
johabjamo_choseong[c_cho])
else
return 2;
}
else
OUT1(0xac00 +
i_cho * 588 +
i_jung * 28 +
(i_jong == FILL ? 0 : i_jong))
}
NEXT(2, 1)
} else {
/* KS X 1001 except hangul jamos and syllables */
if (c == 0xdf || c > 0xf9 ||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
(c2 & 0x7f) == 0x7f ||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
return 2;
else {
unsigned char t1, t2;
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
2 * c - 0x197);
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
else return 2;
NEXT(2, 1)
}
}
}
return 0;
}
#undef NONE
#undef FILL
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(ksx1001)
MAPPING_ENCONLY(cp949)
MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(euc_kr)
CODEC_STATELESS(cp949)
CODEC_STATELESS(johab)
END_CODECS_LIST
I_AM_A_MODULE_FOR(kr)

View File

@@ -0,0 +1,132 @@
/*
* _codecs_tw.c: Codecs collection for Taiwan's encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_tw.h"
/*
* BIG5 codec
*/
ENCODER(big5)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(big5)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* CP950 codec
*/
ENCODER(cp950)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
else TRYMAP_DEC(big5, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_ENCDEC(big5)
MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(big5)
CODEC_STATELESS(cp950)
END_CODECS_LIST
I_AM_A_MODULE_FOR(tw)

View File

@@ -0,0 +1,24 @@
#define JISX0201_R_ENCODE(c, assi) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
(assi) = (c); \
else if ((c) == 0x00a5) (assi) = 0x5c; \
else if ((c) == 0x203e) (assi) = 0x7e;
#define JISX0201_K_ENCODE(c, assi) \
if ((c) >= 0xff61 && (c) <= 0xff9f) \
(assi) = (c) - 0xfec0;
#define JISX0201_ENCODE(c, assi) \
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
#define JISX0201_R_DECODE(c, assi) \
if ((c) < 0x5c) (assi) = (c); \
else if ((c) == 0x5c) (assi) = 0x00a5; \
else if ((c) < 0x7e) (assi) = (c); \
else if ((c) == 0x7e) (assi) = 0x203e; \
else if ((c) == 0x7f) (assi) = 0x7f;
#define JISX0201_K_DECODE(c, assi) \
if ((c) >= 0xa1 && (c) <= 0xdf) \
(assi) = 0xfec0 + (c);
#define JISX0201_DECODE(c, assi) \
JISX0201_R_DECODE(c, assi) \
else JISX0201_K_DECODE(c, assi)

View File

@@ -0,0 +1,398 @@
/*
* cjkcodecs.h: common header for cjkcodecs
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "multibytecodec.h"
/* a unicode "undefined" codepoint */
#define UNIINV 0xFFFE
/* internal-use DBCS codepoints which aren't used by any charsets */
#define NOCHAR 0xFFFF
#define MULTIC 0xFFFE
#define DBCINV 0xFFFD
/* shorter macros to save source size of mapping tables */
#define U UNIINV
#define N NOCHAR
#define M MULTIC
#define D DBCINV
struct dbcs_index {
const ucs2_t *map;
unsigned char bottom, top;
};
typedef struct dbcs_index decode_map;
struct widedbcs_index {
const ucs4_t *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
struct unim_index {
const DBCHAR *map;
unsigned char bottom, top;
};
typedef struct unim_index encode_map;
struct unim_index_bytebased {
const unsigned char *map;
unsigned char bottom, top;
};
struct dbcs_map {
const char *charset;
const struct unim_index *encmap;
const struct dbcs_index *decmap;
};
struct pair_encodemap {
ucs4_t uniseq;
DBCHAR code;
};
static const MultibyteCodec *codec_list;
static const struct dbcs_map *mapping_list;
#define CODEC_INIT(encoding) \
static int encoding##_codec_init(const void *config)
#define ENCODER_INIT(encoding) \
static int encoding##_encode_init( \
MultibyteCodec_State *state, const void *config)
#define ENCODER(encoding) \
static Py_ssize_t encoding##_encode( \
MultibyteCodec_State *state, const void *config, \
const Py_UNICODE **inbuf, Py_ssize_t inleft, \
unsigned char **outbuf, Py_ssize_t outleft, int flags)
#define ENCODER_RESET(encoding) \
static Py_ssize_t encoding##_encode_reset( \
MultibyteCodec_State *state, const void *config, \
unsigned char **outbuf, Py_ssize_t outleft)
#define DECODER_INIT(encoding) \
static int encoding##_decode_init( \
MultibyteCodec_State *state, const void *config)
#define DECODER(encoding) \
static Py_ssize_t encoding##_decode( \
MultibyteCodec_State *state, const void *config, \
const unsigned char **inbuf, Py_ssize_t inleft, \
Py_UNICODE **outbuf, Py_ssize_t outleft)
#define DECODER_RESET(encoding) \
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code) \
if ((code) > 0xFFFF) \
return 1;
#else
#define UCS4INVALID(code) \
if (0) ;
#endif
#define NEXT_IN(i) \
(*inbuf) += (i); \
(inleft) -= (i);
#define NEXT_OUT(o) \
(*outbuf) += (o); \
(outleft) -= (o);
#define NEXT(i, o) \
NEXT_IN(i) NEXT_OUT(o)
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
return MBERR_TOOFEW;
#define REQUIRE_OUTBUF(n) \
if (outleft < (n)) \
return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);
#define WRITE1(c1) \
REQUIRE_OUTBUF(1) \
(*outbuf)[0] = (c1);
#define WRITE2(c1, c2) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3) \
REQUIRE_OUTBUF(3) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4) \
REQUIRE_OUTBUF(4) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4);
#if Py_UNICODE_SIZE == 2
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
NEXT_OUT(2)
#else
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(1) \
**outbuf = (Py_UNICODE)(c); \
NEXT_OUT(1)
#endif
#define _TRYMAP_ENC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != NOCHAR)
#define TRYMAP_ENC_COND(charset, assi, uni) \
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
#define TRYMAP_ENC(charset, assi, uni) \
if TRYMAP_ENC_COND(charset, assi, uni)
#define _TRYMAP_DEC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, assi, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && \
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
assplane, asshi, asslo, (uni) & 0xff)
#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
REQUIRE_INBUF(2) \
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
((ucs4_t)(IN2) - 0xdc00); \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c) 1
#endif
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
#define END_MAPPINGS_LIST \
{"", NULL, NULL} }; \
static const struct dbcs_map *mapping_list = \
(const struct dbcs_map *)_mapping_list;
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
#define _STATEFUL_METHODS(enc) \
enc##_encode, \
enc##_encode_init, \
enc##_encode_reset, \
enc##_decode, \
enc##_decode_init, \
enc##_decode_reset,
#define _STATELESS_METHODS(enc) \
enc##_encode, NULL, NULL, \
enc##_decode, NULL, NULL,
#define CODEC_STATEFUL(enc) { \
#enc, NULL, NULL, \
_STATEFUL_METHODS(enc) \
},
#define CODEC_STATELESS(enc) { \
#enc, NULL, NULL, \
_STATELESS_METHODS(enc) \
},
#define CODEC_STATELESS_WINIT(enc) { \
#enc, NULL, \
enc##_codec_init, \
_STATELESS_METHODS(enc) \
},
#define END_CODECS_LIST \
{"", NULL,} }; \
static const MultibyteCodec *codec_list = \
(const MultibyteCodec *)_codec_list;
static PyObject *
getmultibytecodec(void)
{
static PyObject *cofunc = NULL;
if (cofunc == NULL) {
PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
if (mod == NULL)
return NULL;
cofunc = PyObject_GetAttrString(mod, "__create_codec");
Py_DECREF(mod);
}
return cofunc;
}
static PyObject *
getcodec(PyObject *self, PyObject *encoding)
{
PyObject *codecobj, *r, *cofunc;
const MultibyteCodec *codec;
const char *enc;
if (!PyString_Check(encoding)) {
PyErr_SetString(PyExc_TypeError,
"encoding name must be a string.");
return NULL;
}
cofunc = getmultibytecodec();
if (cofunc == NULL)
return NULL;
enc = PyString_AS_STRING(encoding);
for (codec = codec_list; codec->encoding[0]; codec++)
if (strcmp(codec->encoding, enc) == 0)
break;
if (codec->encoding[0] == '\0') {
PyErr_SetString(PyExc_LookupError,
"no such codec is supported.");
return NULL;
}
codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
if (codecobj == NULL)
return NULL;
r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
Py_DECREF(codecobj);
return r;
}
static struct PyMethodDef __methods[] = {
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
{NULL, NULL},
};
static int
register_maps(PyObject *module)
{
const struct dbcs_map *h;
for (h = mapping_list; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
int r;
strcpy(mhname + sizeof("__map_") - 1, h->charset);
r = PyModule_AddObject(module, mhname,
PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
if (r == -1)
return -1;
}
return 0;
}
#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR
find_pairencmap(ucs2_t body, ucs2_t modifier,
const struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
ucs4_t value = body << 16 | modifier;
min = 0;
max = haystacksize;
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
if (value < haystack[pos].uniseq) {
if (max == pos) break;
else max = pos;
}
else if (value > haystack[pos].uniseq) {
if (min == pos) break;
else min = pos;
}
else
break;
if (value == haystack[pos].uniseq)
return haystack[pos].code;
else
return DBCINV;
}
#endif
#ifdef USING_IMPORTED_MAPS
#define IMPORT_MAP(locale, charset, encmap, decmap) \
importmap("_codecs_" #locale, "__map_" #charset, \
(const void**)encmap, (const void**)decmap)
static int
importmap(const char *modname, const char *symbol,
const void **encmap, const void **decmap)
{
PyObject *o, *mod;
mod = PyImport_ImportModule((char *)modname);
if (mod == NULL)
return -1;
o = PyObject_GetAttrString(mod, (char*)symbol);
if (o == NULL)
goto errorexit;
else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
PyErr_SetString(PyExc_ValueError,
"map data must be a Capsule.");
goto errorexit;
}
else {
struct dbcs_map *map;
map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
if (encmap != NULL)
*encmap = map->encmap;
if (decmap != NULL)
*decmap = map->decmap;
Py_DECREF(o);
}
Py_DECREF(mod);
return 0;
errorexit:
Py_DECREF(mod);
return -1;
}
#endif
#define I_AM_A_MODULE_FOR(loc) \
void \
init_codecs_##loc(void) \
{ \
PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
if (m != NULL) \
(void)register_maps(m); \
}
#endif

View File

@@ -0,0 +1,43 @@
/* These routines may be quite inefficient, but it's used only to emulate old
* standards. */
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1
#endif
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
if (config == (void *)2000 && ( \
(c) == 0x9B1C || (c) == 0x4FF1 || \
(c) == 0x525D || (c) == 0x541E || \
(c) == 0x5653 || (c) == 0x59F8 || \
(c) == 0x5C5B || (c) == 0x5E77 || \
(c) == 0x7626 || (c) == 0x7E6B)) \
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
else if (config == (void *)2000 && (c) == 0x9B1D) \
(assi) = 0x8000 | 0x7d3b; \
#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
if (config == (void *)2000 && (c) == 0x20B9F) \
return EMULATE_JISX0213_2000_ENCODE_INVALID;
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
#define EMULATE_JISX0213_2000_DECODE_INVALID 2
#endif
#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
if (config == (void *)2000 && \
(((c1) == 0x2E && (c2) == 0x21) || \
((c1) == 0x2F && (c2) == 0x7E) || \
((c1) == 0x4F && (c2) == 0x54) || \
((c1) == 0x4F && (c2) == 0x7E) || \
((c1) == 0x74 && (c2) == 0x27) || \
((c1) == 0x7E && (c2) == 0x7A) || \
((c1) == 0x7E && (c2) == 0x7B) || \
((c1) == 0x7E && (c2) == 0x7C) || \
((c1) == 0x7E && (c2) == 0x7D) || \
((c1) == 0x7E && (c2) == 0x7E))) \
return EMULATE_JISX0213_2000_DECODE_INVALID;
#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
(assi) = 0x9B1D;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,59 @@
#define JISX0213_ENCPAIRS 46
#ifdef EXTERN_JISX0213_PAIR
static const struct widedbcs_index *jisx0213_pair_decmap;
static const struct pair_encodemap *jisx0213_pair_encmap;
#else
static const ucs4_t __jisx0213_pair_decmap[49] = {
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
39453441,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,48825061,48562921,
};
static const struct widedbcs_index jisx0213_pair_decmap[256] = {
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap
+0,119,123},{__jisx0213_pair_decmap+5,119,126},{__jisx0213_pair_decmap+13,120,
120},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap+14,68,102},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
};
static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {
{0x00e60000,0x295c},{0x00e60300,0x2b44},{0x02540000,0x2b38},{0x02540300,0x2b48
},{0x02540301,0x2b49},{0x02590000,0x2b30},{0x02590300,0x2b4c},{0x02590301,
0x2b4d},{0x025a0000,0x2b43},{0x025a0300,0x2b4e},{0x025a0301,0x2b4f},{
0x028c0000,0x2b37},{0x028c0300,0x2b4a},{0x028c0301,0x2b4b},{0x02e50000,0x2b60
},{0x02e502e9,0x2b66},{0x02e90000,0x2b64},{0x02e902e5,0x2b65},{0x304b0000,
0x242b},{0x304b309a,0x2477},{0x304d0000,0x242d},{0x304d309a,0x2478},{
0x304f0000,0x242f},{0x304f309a,0x2479},{0x30510000,0x2431},{0x3051309a,0x247a
},{0x30530000,0x2433},{0x3053309a,0x247b},{0x30ab0000,0x252b},{0x30ab309a,
0x2577},{0x30ad0000,0x252d},{0x30ad309a,0x2578},{0x30af0000,0x252f},{
0x30af309a,0x2579},{0x30b10000,0x2531},{0x30b1309a,0x257a},{0x30b30000,0x2533
},{0x30b3309a,0x257b},{0x30bb0000,0x253b},{0x30bb309a,0x257c},{0x30c40000,
0x2544},{0x30c4309a,0x257d},{0x30c80000,0x2548},{0x30c8309a,0x257e},{
0x31f70000,0x2675},{0x31f7309a,0x2678},
};
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,141 @@
/*
* multibytecodec.h: Common Multibyte Codec Implementation
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#ifndef _PYTHON_MULTIBYTECODEC_H_
#define _PYTHON_MULTIBYTECODEC_H_
#ifdef __cplusplus
extern "C" {
#endif
#ifdef uint32_t
typedef uint32_t ucs4_t;
#else
typedef unsigned int ucs4_t;
#endif
#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
typedef unsigned short ucs2_t, DBCHAR;
#endif
typedef union {
void *p;
int i;
unsigned char c[8];
ucs2_t u2[4];
ucs4_t u4[2];
} MultibyteCodec_State;
typedef int (*mbcodec_init)(const void *config);
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
const void *config,
const Py_UNICODE **inbuf, Py_ssize_t inleft,
unsigned char **outbuf, Py_ssize_t outleft,
int flags);
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
const void *config,
unsigned char **outbuf, Py_ssize_t outleft);
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
const void *config,
const unsigned char **inbuf, Py_ssize_t inleft,
Py_UNICODE **outbuf, Py_ssize_t outleft);
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
const void *config);
typedef struct {
const char *encoding;
const void *config;
mbcodec_init codecinit;
mbencode_func encode;
mbencodeinit_func encinit;
mbencodereset_func encreset;
mbdecode_func decode;
mbdecodeinit_func decinit;
mbdecodereset_func decreset;
} MultibyteCodec;
typedef struct {
PyObject_HEAD
MultibyteCodec *codec;
} MultibyteCodecObject;
#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
#define _MultibyteStatefulCodec_HEAD \
PyObject_HEAD \
MultibyteCodec *codec; \
MultibyteCodec_State state; \
PyObject *errors;
typedef struct {
_MultibyteStatefulCodec_HEAD
} MultibyteStatefulCodecContext;
#define MAXENCPENDING 2
#define _MultibyteStatefulEncoder_HEAD \
_MultibyteStatefulCodec_HEAD \
Py_UNICODE pending[MAXENCPENDING]; \
Py_ssize_t pendingsize;
typedef struct {
_MultibyteStatefulEncoder_HEAD
} MultibyteStatefulEncoderContext;
#define MAXDECPENDING 8
#define _MultibyteStatefulDecoder_HEAD \
_MultibyteStatefulCodec_HEAD \
unsigned char pending[MAXDECPENDING]; \
Py_ssize_t pendingsize;
typedef struct {
_MultibyteStatefulDecoder_HEAD
} MultibyteStatefulDecoderContext;
typedef struct {
_MultibyteStatefulEncoder_HEAD
} MultibyteIncrementalEncoderObject;
typedef struct {
_MultibyteStatefulDecoder_HEAD
} MultibyteIncrementalDecoderObject;
typedef struct {
_MultibyteStatefulDecoder_HEAD
PyObject *stream;
} MultibyteStreamReaderObject;
typedef struct {
_MultibyteStatefulEncoder_HEAD
PyObject *stream;
} MultibyteStreamWriterObject;
/* positive values for illegal sequences */
#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
#define MBERR_TOOFEW (-2) /* incomplete input buffer */
#define MBERR_INTERNAL (-3) /* internal runtime error */
#define ERROR_STRICT (PyObject *)(1)
#define ERROR_IGNORE (PyObject *)(2)
#define ERROR_REPLACE (PyObject *)(3)
#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
#define ERROR_DECREF(p) do { \
if (p != NULL && ERROR_ISCUSTOM(p)) { \
Py_DECREF(p); \
} \
} while (0);
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
#define MBENC_MAX MBENC_FLUSH
#define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*"
#ifdef __cplusplus
}
#endif
#endif